From 41d81b996fdd8276cc04e750297a12f852a97bf4 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 10 May 2010 15:32:02 -0700 Subject: Test cases for MediaFile::getUploadedFileType() with OpenOffice, MS Office, and PDF sample files (as saved from OpenOffice 3.2) Only 3 of 16 cases pass on my dev box with default config. Ouch! --- tests/MediaFileTest.php | 77 +++++++++++++++++++++++++++++++++ tests/sample-uploads/office.pdf | Bin 0 -> 1162 bytes tests/sample-uploads/presentation.odp | Bin 0 -> 9330 bytes tests/sample-uploads/presentation.otp | Bin 0 -> 9359 bytes tests/sample-uploads/presentation.pot | Bin 0 -> 71168 bytes tests/sample-uploads/presentation.potm | Bin 0 -> 5789 bytes tests/sample-uploads/presentation.ppt | Bin 0 -> 71168 bytes tests/sample-uploads/presentation.pptx | Bin 0 -> 5790 bytes tests/sample-uploads/spreadsheet.ods | Bin 0 -> 6560 bytes tests/sample-uploads/spreadsheet.ots | Bin 0 -> 6575 bytes tests/sample-uploads/spreadsheet.xls | Bin 0 -> 6656 bytes tests/sample-uploads/spreadsheet.xlsx | Bin 0 -> 6010 bytes tests/sample-uploads/spreadsheet.xlt | Bin 0 -> 6144 bytes tests/sample-uploads/wordproc.doc | Bin 0 -> 9216 bytes tests/sample-uploads/wordproc.docx | Bin 0 -> 3350 bytes tests/sample-uploads/wordproc.odt | Bin 0 -> 7641 bytes tests/sample-uploads/wordproc.ott | Bin 0 -> 7656 bytes tests/sample-uploads/wordproc.rtf | 16 +++++++ 18 files changed, 93 insertions(+) create mode 100644 tests/MediaFileTest.php create mode 100644 tests/sample-uploads/office.pdf create mode 100644 tests/sample-uploads/presentation.odp create mode 100644 tests/sample-uploads/presentation.otp create mode 100644 tests/sample-uploads/presentation.pot create mode 100644 tests/sample-uploads/presentation.potm create mode 100644 tests/sample-uploads/presentation.ppt create mode 100644 tests/sample-uploads/presentation.pptx create mode 100644 tests/sample-uploads/spreadsheet.ods create mode 100644 tests/sample-uploads/spreadsheet.ots create mode 100644 tests/sample-uploads/spreadsheet.xls create mode 100644 tests/sample-uploads/spreadsheet.xlsx create mode 100644 tests/sample-uploads/spreadsheet.xlt create mode 100644 tests/sample-uploads/wordproc.doc create mode 100644 tests/sample-uploads/wordproc.docx create mode 100644 tests/sample-uploads/wordproc.odt create mode 100644 tests/sample-uploads/wordproc.ott create mode 100644 tests/sample-uploads/wordproc.rtf diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php new file mode 100644 index 000000000..6fe995621 --- /dev/null +++ b/tests/MediaFileTest.php @@ -0,0 +1,77 @@ +old_attachments_supported = common_config('attachments', 'supported'); + $GLOBALS['config']['attachments']['supported'] = true; + } + + public function tearDown() + { + $GLOBALS['config']['attachments']['supported'] = $this->old_attachments_supported; + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + } + + static public function fileTypeCases() + { + $base = dirname(__FILE__); + $dir = "$base/sample-uploads"; + return array( + array("$dir/office.pdf", "application/pdf"), + + array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), + array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), + array("$dir/wordproc.doc", "application/msword"), + array("$dir/wordproc.docx", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), + array("$dir/wordproc.rtf", "text/rtf"), + + array("$dir/spreadsheet.ods", + "application/vnd.oasis.opendocument.spreadsheet"), + array("$dir/spreadsheet.ots", + "application/vnd.oasis.opendocument.spreadsheet-template"), + array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), + array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), + array("$dir/spreadsheet.xlsx", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + + array("$dir/presentation.odp", + "application/vnd.oasis-opendocument.presentation"), + array("$dir/presentation.otp", + "application/vnd.oasis-opendocument.presentation-template"), + array("$dir/presentation.ppt", + "application/vnd.ms-powerpoint"), + array("$dir/presentation.pot", + "application/vnd.ms-powerpoint"), + array("$dir/presentation.pptx", + "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + ); + } + +} + diff --git a/tests/sample-uploads/office.pdf b/tests/sample-uploads/office.pdf new file mode 100644 index 000000000..670bc2343 Binary files /dev/null and b/tests/sample-uploads/office.pdf differ diff --git a/tests/sample-uploads/presentation.odp b/tests/sample-uploads/presentation.odp new file mode 100644 index 000000000..8dd3a428b Binary files /dev/null and b/tests/sample-uploads/presentation.odp differ diff --git a/tests/sample-uploads/presentation.otp b/tests/sample-uploads/presentation.otp new file mode 100644 index 000000000..1927ee79d Binary files /dev/null and b/tests/sample-uploads/presentation.otp differ diff --git a/tests/sample-uploads/presentation.pot b/tests/sample-uploads/presentation.pot new file mode 100644 index 000000000..f5124ffa2 Binary files /dev/null and b/tests/sample-uploads/presentation.pot differ diff --git a/tests/sample-uploads/presentation.potm b/tests/sample-uploads/presentation.potm new file mode 100644 index 000000000..ade1bcb10 Binary files /dev/null and b/tests/sample-uploads/presentation.potm differ diff --git a/tests/sample-uploads/presentation.ppt b/tests/sample-uploads/presentation.ppt new file mode 100644 index 000000000..f5124ffa2 Binary files /dev/null and b/tests/sample-uploads/presentation.ppt differ diff --git a/tests/sample-uploads/presentation.pptx b/tests/sample-uploads/presentation.pptx new file mode 100644 index 000000000..21ea61a15 Binary files /dev/null and b/tests/sample-uploads/presentation.pptx differ diff --git a/tests/sample-uploads/spreadsheet.ods b/tests/sample-uploads/spreadsheet.ods new file mode 100644 index 000000000..7b43e7507 Binary files /dev/null and b/tests/sample-uploads/spreadsheet.ods differ diff --git a/tests/sample-uploads/spreadsheet.ots b/tests/sample-uploads/spreadsheet.ots new file mode 100644 index 000000000..5f830e6de Binary files /dev/null and b/tests/sample-uploads/spreadsheet.ots differ diff --git a/tests/sample-uploads/spreadsheet.xls b/tests/sample-uploads/spreadsheet.xls new file mode 100644 index 000000000..2d470e687 Binary files /dev/null and b/tests/sample-uploads/spreadsheet.xls differ diff --git a/tests/sample-uploads/spreadsheet.xlsx b/tests/sample-uploads/spreadsheet.xlsx new file mode 100644 index 000000000..b97a551f8 Binary files /dev/null and b/tests/sample-uploads/spreadsheet.xlsx differ diff --git a/tests/sample-uploads/spreadsheet.xlt b/tests/sample-uploads/spreadsheet.xlt new file mode 100644 index 000000000..980423b20 Binary files /dev/null and b/tests/sample-uploads/spreadsheet.xlt differ diff --git a/tests/sample-uploads/wordproc.doc b/tests/sample-uploads/wordproc.doc new file mode 100644 index 000000000..81c5e34c6 Binary files /dev/null and b/tests/sample-uploads/wordproc.doc differ diff --git a/tests/sample-uploads/wordproc.docx b/tests/sample-uploads/wordproc.docx new file mode 100644 index 000000000..04ea3c3ec Binary files /dev/null and b/tests/sample-uploads/wordproc.docx differ diff --git a/tests/sample-uploads/wordproc.odt b/tests/sample-uploads/wordproc.odt new file mode 100644 index 000000000..fa6fe5e9f Binary files /dev/null and b/tests/sample-uploads/wordproc.odt differ diff --git a/tests/sample-uploads/wordproc.ott b/tests/sample-uploads/wordproc.ott new file mode 100644 index 000000000..99ca8c068 Binary files /dev/null and b/tests/sample-uploads/wordproc.ott differ diff --git a/tests/sample-uploads/wordproc.rtf b/tests/sample-uploads/wordproc.rtf new file mode 100644 index 000000000..aad2c4605 --- /dev/null +++ b/tests/sample-uploads/wordproc.rtf @@ -0,0 +1,16 @@ +{\rtf1\ansi\deff0\adeflang1025 +{\fonttbl{\f0\froman\fprq2\fcharset128 Times New Roman;}{\f1\froman\fprq2\fcharset128 Times New Roman;}{\f2\fswiss\fprq2\fcharset128 Arial;}{\f3\fnil\fprq2\fcharset128 DejaVu Sans;}} +{\colortbl;\red0\green0\blue0;\red128\green128\blue128;} +{\stylesheet{\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\loch\f0\fs24\lang1033\snext1 Normal;} +{\s2\sb240\sa120\keepn\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\afs28\lang1081\ltrch\dbch\langfe2052\hich\f2\fs28\lang1033\loch\f2\fs28\lang1033\sbasedon1\snext3 Heading;} +{\s3\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\loch\f0\fs24\lang1033\sbasedon1\snext3 Body Text;} +{\s4\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\loch\f0\fs24\lang1033\sbasedon3\snext4 List;} +{\s5\sb120\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ai\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\i\loch\f0\fs24\lang1033\i\sbasedon1\snext5 caption;} +{\s6\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\loch\f0\fs24\lang1033\sbasedon1\snext6 Index;} +} +{\info{\author Brion }{\creatim\yr2010\mo5\dy10\hr15\min2}{\revtim\yr0\mo0\dy0\hr0\min0}{\printim\yr0\mo0\dy0\hr0\min0}{\comment StarWriter}{\vern3200}}\deftab709 +{\*\pgdsctbl +{\pgdsc0\pgdscuse195\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\pgdscnxt0 Standard;}} +\paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc +\pard\plain \ltrpar\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af3\afs24\lang1081\ltrch\dbch\af3\langfe2052\hich\f0\fs24\lang1033\loch\f0\fs24\lang1033 +\par } \ No newline at end of file -- cgit v1.2.3-54-g00ecf From 9c7b66984c46668f314f93337d28c62854b6d134 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 10 May 2010 16:18:29 -0700 Subject: Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected. File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work. --- lib/mediafile.php | 54 +++++++++++++++++++++++++++-- tests/MediaFileTest.php | 73 +++++++++++++++++++++++++--------------- tests/sample-uploads/image.gif | Bin 0 -> 35 bytes tests/sample-uploads/image.jpeg | Bin 0 -> 306 bytes tests/sample-uploads/image.jpg | Bin 0 -> 306 bytes tests/sample-uploads/image.png | Bin 0 -> 159 bytes 6 files changed, 97 insertions(+), 30 deletions(-) create mode 100644 tests/sample-uploads/image.gif create mode 100644 tests/sample-uploads/image.jpeg create mode 100644 tests/sample-uploads/image.jpg create mode 100644 tests/sample-uploads/image.png diff --git a/lib/mediafile.php b/lib/mediafile.php index 10d90d008..85d673d92 100644 --- a/lib/mediafile.php +++ b/lib/mediafile.php @@ -180,7 +180,8 @@ class MediaFile return; } - $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']); + $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'], + $_FILES[$param]['name']); $filename = null; @@ -241,19 +242,41 @@ class MediaFile return new MediaFile($user, $filename, $mimetype); } - static function getUploadedFileType($f) { + /** + * Attempt to identify the content type of a given file. + * + * @param mixed $f file handle resource, or filesystem path as string + * @param string $originalFilename (optional) for extension-based detection + * @return string + * + * @fixme is this an internal or public method? It's called from GetFileAction + * @fixme this seems to tie a front-end error message in, kinda confusing + * @fixme this looks like it could return a PEAR_Error in some cases, if + * type can't be identified and $config['attachments']['supported'] is true + * + * @throws ClientException if type is known, but not supported for local uploads + */ + static function getUploadedFileType($f, $originalFilename=false) { require_once 'MIME/Type.php'; + require_once 'MIME/Type/Extension.php'; + $mte = new MIME_Type_Extension(); $cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd'); $cmd = common_config('attachments', 'filecommand'); $filetype = null; + // If we couldn't get a clear type from the file extension, + // we'll go ahead and try checking the content. Content checks + // are unambiguous for most image files, but nearly useless + // for office document formats. + if (is_string($f)) { // assuming a filename $filetype = MIME_Type::autoDetect($f); + } else { // assuming a filehandle @@ -262,7 +285,32 @@ class MediaFile $filetype = MIME_Type::autoDetect($stream['uri']); } - if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) { + // The content-based sources for MIME_Type::autoDetect() + // are wildly unreliable for office-type documents. If we've + // gotten an unclear reponse back or just couldn't identify it, + // we'll try detecting a type from its extension... + $unclearTypes = array('application/octet-stream', + 'application/vnd.ms-office', + 'application/zip'); + + if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) { + $type = $mte->getMIMEType($originalFilename); + if (is_string($type)) { + $filetype = $type; + } + } + + $supported = common_config('attachments', 'supported'); + if (is_array($supported)) { + // Normalize extensions to mime types + foreach ($supported as $i => $entry) { + if (strpos($entry, '/') === false) { + common_log(LOG_INFO, "sample.$entry"); + $supported[$i] = $mte->getMIMEType("sample.$entry"); + } + } + } + if ($supported === true || in_array($filetype, $supported)) { return $filetype; } $media = MIME_Type::getMedia($filetype); diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php index 6fe995621..a76a4f45e 100644 --- a/tests/MediaFileTest.php +++ b/tests/MediaFileTest.php @@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase if (!file_exists($filename)) { throw new Exception("WTF? $filename test file missing"); } - $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename)); + + $type = MediaFile::getUploadedFileType($filename, basename($filename)); + $this->assertEquals($expectedType, $type); + } + + /** + * @dataProvider fileTypeCases + * + */ + public function testUploadedFileType($filename, $expectedType) + { + if (!file_exists($filename)) { + throw new Exception("WTF? $filename test file missing"); + } + $tmp = tmpfile(); + fwrite($tmp, file_get_contents($filename)); + + $type = MediaFile::getUploadedFileType($tmp, basename($filename)); + $this->assertEquals($expectedType, $type); } static public function fileTypeCases() { $base = dirname(__FILE__); $dir = "$base/sample-uploads"; - return array( - array("$dir/office.pdf", "application/pdf"), + $files = array( + "image.png" => "image/png", + "image.gif" => "image/gif", + "image.jpg" => "image/jpeg", + "image.jpeg" => "image/jpeg", + + "office.pdf" => "application/pdf", - array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"), - array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"), - array("$dir/wordproc.doc", "application/msword"), - array("$dir/wordproc.docx", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document"), - array("$dir/wordproc.rtf", "text/rtf"), + "wordproc.odt" => "application/vnd.oasis.opendocument.text", + "wordproc.ott" => "application/vnd.oasis.opendocument.text-template", + "wordproc.doc" => "application/msword", + "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "wordproc.rtf" => "text/rtf", - array("$dir/spreadsheet.ods", - "application/vnd.oasis.opendocument.spreadsheet"), - array("$dir/spreadsheet.ots", - "application/vnd.oasis.opendocument.spreadsheet-template"), - array("$dir/spreadsheet.xls", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"), - array("$dir/spreadsheet.xlsx", - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), + "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet", + "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template", + "spreadsheet.xls" => "application/vnd.ms-excel", + "spreadsheet.xlt" => "application/vnd.ms-excel", + "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - array("$dir/presentation.odp", - "application/vnd.oasis-opendocument.presentation"), - array("$dir/presentation.otp", - "application/vnd.oasis-opendocument.presentation-template"), - array("$dir/presentation.ppt", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pot", - "application/vnd.ms-powerpoint"), - array("$dir/presentation.pptx", - "application/vnd.openxmlformats-officedocument.presentationml.presentation"), + "presentation.odp" => "application/vnd.oasis.opendocument.presentation", + "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template", + "presentation.ppt" => "application/vnd.ms-powerpoint", + "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", ); + + $dataset = array(); + foreach ($files as $file => $type) { + $dataset[] = array("$dir/$file", $type); + } + return $dataset; } } diff --git a/tests/sample-uploads/image.gif b/tests/sample-uploads/image.gif new file mode 100644 index 000000000..b636f4b8d Binary files /dev/null and b/tests/sample-uploads/image.gif differ diff --git a/tests/sample-uploads/image.jpeg b/tests/sample-uploads/image.jpeg new file mode 100644 index 000000000..21fcb5aef Binary files /dev/null and b/tests/sample-uploads/image.jpeg differ diff --git a/tests/sample-uploads/image.jpg b/tests/sample-uploads/image.jpg new file mode 100644 index 000000000..21fcb5aef Binary files /dev/null and b/tests/sample-uploads/image.jpg differ diff --git a/tests/sample-uploads/image.png b/tests/sample-uploads/image.png new file mode 100644 index 000000000..60cbcfd17 Binary files /dev/null and b/tests/sample-uploads/image.png differ -- cgit v1.2.3-54-g00ecf