summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrion Vibber <brion@pobox.com>2010-05-10 16:18:29 -0700
committerBrion Vibber <brion@pobox.com>2010-06-28 08:59:47 -0700
commit9c7b66984c46668f314f93337d28c62854b6d134 (patch)
tree75c9c75503803f8d863a7bd04f33c2f139a37849
parent41d81b996fdd8276cc04e750297a12f852a97bf4 (diff)
Enhanced upload file type detection. If given an original filename, we'll attempt to detect type from the extension if we were unable to make a definitive match from content. Generic octet-stream, zip, and MS Office type are explicitly singled out for re-checks, which fixes OpenOffice and MS Office documents to come up with the proper types when misdetected.
File extensions can also be added to the upload type whitelist; they'll be normalized to types for the actual comparison, so only known extensions will work.
-rw-r--r--lib/mediafile.php54
-rw-r--r--tests/MediaFileTest.php73
-rw-r--r--tests/sample-uploads/image.gifbin0 -> 35 bytes
-rw-r--r--tests/sample-uploads/image.jpegbin0 -> 306 bytes
-rw-r--r--tests/sample-uploads/image.jpgbin0 -> 306 bytes
-rw-r--r--tests/sample-uploads/image.pngbin0 -> 159 bytes
6 files changed, 97 insertions, 30 deletions
diff --git a/lib/mediafile.php b/lib/mediafile.php
index 10d90d008..85d673d92 100644
--- a/lib/mediafile.php
+++ b/lib/mediafile.php
@@ -180,7 +180,8 @@ class MediaFile
return;
}
- $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name']);
+ $mimetype = MediaFile::getUploadedFileType($_FILES[$param]['tmp_name'],
+ $_FILES[$param]['name']);
$filename = null;
@@ -241,19 +242,41 @@ class MediaFile
return new MediaFile($user, $filename, $mimetype);
}
- static function getUploadedFileType($f) {
+ /**
+ * Attempt to identify the content type of a given file.
+ *
+ * @param mixed $f file handle resource, or filesystem path as string
+ * @param string $originalFilename (optional) for extension-based detection
+ * @return string
+ *
+ * @fixme is this an internal or public method? It's called from GetFileAction
+ * @fixme this seems to tie a front-end error message in, kinda confusing
+ * @fixme this looks like it could return a PEAR_Error in some cases, if
+ * type can't be identified and $config['attachments']['supported'] is true
+ *
+ * @throws ClientException if type is known, but not supported for local uploads
+ */
+ static function getUploadedFileType($f, $originalFilename=false) {
require_once 'MIME/Type.php';
+ require_once 'MIME/Type/Extension.php';
+ $mte = new MIME_Type_Extension();
$cmd = &PEAR::getStaticProperty('MIME_Type', 'fileCmd');
$cmd = common_config('attachments', 'filecommand');
$filetype = null;
+ // If we couldn't get a clear type from the file extension,
+ // we'll go ahead and try checking the content. Content checks
+ // are unambiguous for most image files, but nearly useless
+ // for office document formats.
+
if (is_string($f)) {
// assuming a filename
$filetype = MIME_Type::autoDetect($f);
+
} else {
// assuming a filehandle
@@ -262,7 +285,32 @@ class MediaFile
$filetype = MIME_Type::autoDetect($stream['uri']);
}
- if (common_config('attachments', 'supported') === true || in_array($filetype, common_config('attachments', 'supported'))) {
+ // The content-based sources for MIME_Type::autoDetect()
+ // are wildly unreliable for office-type documents. If we've
+ // gotten an unclear reponse back or just couldn't identify it,
+ // we'll try detecting a type from its extension...
+ $unclearTypes = array('application/octet-stream',
+ 'application/vnd.ms-office',
+ 'application/zip');
+
+ if ($originalFilename && (!$filetype || in_array($filetype, $unclearTypes))) {
+ $type = $mte->getMIMEType($originalFilename);
+ if (is_string($type)) {
+ $filetype = $type;
+ }
+ }
+
+ $supported = common_config('attachments', 'supported');
+ if (is_array($supported)) {
+ // Normalize extensions to mime types
+ foreach ($supported as $i => $entry) {
+ if (strpos($entry, '/') === false) {
+ common_log(LOG_INFO, "sample.$entry");
+ $supported[$i] = $mte->getMIMEType("sample.$entry");
+ }
+ }
+ }
+ if ($supported === true || in_array($filetype, $supported)) {
return $filetype;
}
$media = MIME_Type::getMedia($filetype);
diff --git a/tests/MediaFileTest.php b/tests/MediaFileTest.php
index 6fe995621..a76a4f45e 100644
--- a/tests/MediaFileTest.php
+++ b/tests/MediaFileTest.php
@@ -34,43 +34,62 @@ class MediaFileTest extends PHPUnit_Framework_TestCase
if (!file_exists($filename)) {
throw new Exception("WTF? $filename test file missing");
}
- $this->assertEquals($expectedType, MediaFile::getUploadedFileType($filename));
+
+ $type = MediaFile::getUploadedFileType($filename, basename($filename));
+ $this->assertEquals($expectedType, $type);
+ }
+
+ /**
+ * @dataProvider fileTypeCases
+ *
+ */
+ public function testUploadedFileType($filename, $expectedType)
+ {
+ if (!file_exists($filename)) {
+ throw new Exception("WTF? $filename test file missing");
+ }
+ $tmp = tmpfile();
+ fwrite($tmp, file_get_contents($filename));
+
+ $type = MediaFile::getUploadedFileType($tmp, basename($filename));
+ $this->assertEquals($expectedType, $type);
}
static public function fileTypeCases()
{
$base = dirname(__FILE__);
$dir = "$base/sample-uploads";
- return array(
- array("$dir/office.pdf", "application/pdf"),
+ $files = array(
+ "image.png" => "image/png",
+ "image.gif" => "image/gif",
+ "image.jpg" => "image/jpeg",
+ "image.jpeg" => "image/jpeg",
+
+ "office.pdf" => "application/pdf",
- array("$dir/wordproc.odt", "application/vnd.oasis.opendocument.text"),
- array("$dir/wordproc.ott", "application/vnd.oasis.opendocument.text-template"),
- array("$dir/wordproc.doc", "application/msword"),
- array("$dir/wordproc.docx",
- "application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
- array("$dir/wordproc.rtf", "text/rtf"),
+ "wordproc.odt" => "application/vnd.oasis.opendocument.text",
+ "wordproc.ott" => "application/vnd.oasis.opendocument.text-template",
+ "wordproc.doc" => "application/msword",
+ "wordproc.docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ "wordproc.rtf" => "text/rtf",
- array("$dir/spreadsheet.ods",
- "application/vnd.oasis.opendocument.spreadsheet"),
- array("$dir/spreadsheet.ots",
- "application/vnd.oasis.opendocument.spreadsheet-template"),
- array("$dir/spreadsheet.xls", "application/vnd.ms-excel"),
- array("$dir/spreadsheet.xlt", "application/vnd.ms-excel"),
- array("$dir/spreadsheet.xlsx",
- "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
+ "spreadsheet.ods" => "application/vnd.oasis.opendocument.spreadsheet",
+ "spreadsheet.ots" => "application/vnd.oasis.opendocument.spreadsheet-template",
+ "spreadsheet.xls" => "application/vnd.ms-excel",
+ "spreadsheet.xlt" => "application/vnd.ms-excel",
+ "spreadsheet.xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- array("$dir/presentation.odp",
- "application/vnd.oasis-opendocument.presentation"),
- array("$dir/presentation.otp",
- "application/vnd.oasis-opendocument.presentation-template"),
- array("$dir/presentation.ppt",
- "application/vnd.ms-powerpoint"),
- array("$dir/presentation.pot",
- "application/vnd.ms-powerpoint"),
- array("$dir/presentation.pptx",
- "application/vnd.openxmlformats-officedocument.presentationml.presentation"),
+ "presentation.odp" => "application/vnd.oasis.opendocument.presentation",
+ "presentation.otp" => "application/vnd.oasis.opendocument.presentation-template",
+ "presentation.ppt" => "application/vnd.ms-powerpoint",
+ "presentation.pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
);
+
+ $dataset = array();
+ foreach ($files as $file => $type) {
+ $dataset[] = array("$dir/$file", $type);
+ }
+ return $dataset;
}
}
diff --git a/tests/sample-uploads/image.gif b/tests/sample-uploads/image.gif
new file mode 100644
index 000000000..b636f4b8d
--- /dev/null
+++ b/tests/sample-uploads/image.gif
Binary files differ
diff --git a/tests/sample-uploads/image.jpeg b/tests/sample-uploads/image.jpeg
new file mode 100644
index 000000000..21fcb5aef
--- /dev/null
+++ b/tests/sample-uploads/image.jpeg
Binary files differ
diff --git a/tests/sample-uploads/image.jpg b/tests/sample-uploads/image.jpg
new file mode 100644
index 000000000..21fcb5aef
--- /dev/null
+++ b/tests/sample-uploads/image.jpg
Binary files differ
diff --git a/tests/sample-uploads/image.png b/tests/sample-uploads/image.png
new file mode 100644
index 000000000..60cbcfd17
--- /dev/null
+++ b/tests/sample-uploads/image.png
Binary files differ