diff options
author | Pierre Schmitz <pierre@archlinux.de> | 2011-12-03 13:29:22 +0100 |
---|---|---|
committer | Pierre Schmitz <pierre@archlinux.de> | 2011-12-03 13:29:22 +0100 |
commit | ca32f08966f1b51fcb19460f0996bb0c4048e6fe (patch) | |
tree | ec04cc15b867bc21eedca904cea9af0254531a11 /includes/media | |
parent | a22fbfc60f36f5f7ee10d5ae6fe347340c2ee67c (diff) |
Update to MediaWiki 1.18.0
* also update ArchLinux skin to chagnes in MonoBook
* Use only css to hide our menu bar when printing
Diffstat (limited to 'includes/media')
-rw-r--r-- | includes/media/BMP.php | 35 | ||||
-rw-r--r-- | includes/media/Bitmap.php | 363 | ||||
-rw-r--r-- | includes/media/BitmapMetadataHandler.php | 269 | ||||
-rw-r--r-- | includes/media/Bitmap_ClientOnly.php | 14 | ||||
-rw-r--r-- | includes/media/DjVu.php | 65 | ||||
-rw-r--r-- | includes/media/Exif.php | 836 | ||||
-rw-r--r-- | includes/media/ExifBitmap.php | 210 | ||||
-rw-r--r-- | includes/media/FormatMetadata.php | 1354 | ||||
-rw-r--r-- | includes/media/GIF.php | 103 | ||||
-rw-r--r-- | includes/media/GIFMetadataExtractor.php | 236 | ||||
-rw-r--r-- | includes/media/Generic.php | 302 | ||||
-rw-r--r-- | includes/media/IPTC.php | 576 | ||||
-rw-r--r-- | includes/media/Jpeg.php | 46 | ||||
-rw-r--r-- | includes/media/JpegMetadataExtractor.php | 252 | ||||
-rw-r--r-- | includes/media/MediaTransformOutput.php | 26 | ||||
-rw-r--r-- | includes/media/PNG.php | 88 | ||||
-rw-r--r-- | includes/media/PNGMetadataExtractor.php | 359 | ||||
-rw-r--r-- | includes/media/SVG.php | 92 | ||||
-rw-r--r-- | includes/media/SVGMetadataExtractor.php | 27 | ||||
-rw-r--r-- | includes/media/Tiff.php | 51 | ||||
-rw-r--r-- | includes/media/XMP.php | 1174 | ||||
-rw-r--r-- | includes/media/XMPInfo.php | 1139 | ||||
-rw-r--r-- | includes/media/XMPValidate.php | 323 |
23 files changed, 7622 insertions, 318 deletions
diff --git a/includes/media/BMP.php b/includes/media/BMP.php index de836b59..6886e950 100644 --- a/includes/media/BMP.php +++ b/includes/media/BMP.php @@ -13,22 +13,39 @@ * @ingroup Media */ class BmpHandler extends BitmapHandler { - // We never want to use .bmp in an <img/> tag + + /** + * @param $file + * @return bool + */ function mustRender( $file ) { return true; } - // Render files as PNG + /** + * Render files as PNG + * + * @param $text + * @param $mime + * @param $params + * @return array + */ function getThumbType( $text, $mime, $params = null ) { return array( 'png', 'image/png' ); } - /* + /** * Get width and height from the bmp header. + * + * @param $image + * @param $filename + * @return array */ function getImageSize( $image, $filename ) { - $f = fopen( $filename, 'r' ); - if(!$f) return false; + $f = fopen( $filename, 'rb' ); + if( !$f ) { + return false; + } $header = fread( $f, 54 ); fclose($f); @@ -37,8 +54,12 @@ class BmpHandler extends BitmapHandler { $h = substr( $header, 22, 4); // Convert the unsigned long 32 bits (little endian): - $w = unpack( 'V' , $w ); - $h = unpack( 'V' , $h ); + try { + $w = wfUnpack( 'V', $w, 4 ); + $h = wfUnpack( 'V', $h, 4 ); + } catch ( MWException $e ) { + return false; + } return array( $w[1], $h[1] ); } } diff --git a/includes/media/Bitmap.php b/includes/media/Bitmap.php index f5f7ba6d..5f796095 100644 --- a/includes/media/Bitmap.php +++ b/includes/media/Bitmap.php @@ -12,6 +12,14 @@ * @ingroup Media */ class BitmapHandler extends ImageHandler { + + /** + * @param $image File + * @param $params array Transform parameters. Entries with the keys 'width' + * and 'height' are the respective screen width and height, while the keys + * 'physicalWidth' and 'physicalHeight' indicate the thumbnail dimensions. + * @return bool + */ function normaliseParams( $image, &$params ) { global $wgMaxImageArea; if ( !parent::normaliseParams( $image, $params ) ) { @@ -19,25 +27,26 @@ class BitmapHandler extends ImageHandler { } $mimeType = $image->getMimeType(); + # Obtain the source, pre-rotation dimensions $srcWidth = $image->getWidth( $params['page'] ); $srcHeight = $image->getHeight( $params['page'] ); # Don't make an image bigger than the source - $params['physicalWidth'] = $params['width']; - $params['physicalHeight'] = $params['height']; - if ( $params['physicalWidth'] >= $srcWidth ) { $params['physicalWidth'] = $srcWidth; $params['physicalHeight'] = $srcHeight; + # Skip scaling limit checks if no scaling is required - if ( !$image->mustRender() ) + # due to requested size being bigger than source. + if ( !$image->mustRender() ) { return true; + } } - + # Don't thumbnail an image so big that it will fill hard drives and send servers into swap # JPEG has the handy property of allowing thumbnailing without full decompression, so we make # an exception for it. - # FIXME: This actually only applies to ImageMagick + # @todo FIXME: This actually only applies to ImageMagick if ( $mimeType !== 'image/jpeg' && $srcWidth * $srcHeight > $wgMaxImageArea ) { @@ -46,6 +55,30 @@ class BitmapHandler extends ImageHandler { return true; } + + /** + * Extracts the width/height if the image will be scaled before rotating + * + * This will match the physical size/aspect ratio of the original image + * prior to application of the rotation -- so for a portrait image that's + * stored as raw landscape with 90-degress rotation, the resulting size + * will be wider than it is tall. + * + * @param $params array Parameters as returned by normaliseParams + * @param $rotation int The rotation angle that will be applied + * @return array ($width, $height) array + */ + public function extractPreRotationDimensions( $params, $rotation ) { + if ( $rotation == 90 || $rotation == 270 ) { + # We'll resize before rotation, so swap the dimensions again + $width = $params['physicalHeight']; + $height = $params['physicalWidth']; + } else { + $width = $params['physicalWidth']; + $height = $params['physicalHeight']; + } + return array( $width, $height ); + } // Function that returns the number of pixels to be thumbnailed. @@ -54,10 +87,15 @@ class BitmapHandler extends ImageHandler { return $width * $height; } + /** + * @param $image File + * @param $dstPath + * @param $dstUrl + * @param $params + * @param int $flags + * @return MediaTransformError|ThumbnailImage|TransformParameterError + */ function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { - global $wgUseImageMagick; - global $wgCustomConvertCommand, $wgUseImageResize; - if ( !$this->normaliseParams( $image, $params ) ) { return new TransformParameterError( $params ); } @@ -79,6 +117,7 @@ class BitmapHandler extends ImageHandler { 'mimeType' => $image->getMimeType(), 'srcPath' => $image->getPath(), 'dstPath' => $dstPath, + 'dstUrl' => $dstUrl, ); wfDebug( __METHOD__ . ": creating {$scalerParams['physicalDimensions']} thumbnail at $dstPath\n" ); @@ -93,20 +132,7 @@ class BitmapHandler extends ImageHandler { } # Determine scaler type - if ( !$dstPath ) { - # No output path available, client side scaling only - $scaler = 'client'; - } elseif ( !$wgUseImageResize ) { - $scaler = 'client'; - } elseif ( $wgUseImageMagick ) { - $scaler = 'im'; - } elseif ( $wgCustomConvertCommand ) { - $scaler = 'custom'; - } elseif ( function_exists( 'imagecreatetruecolor' ) ) { - $scaler = 'gd'; - } else { - $scaler = 'client'; - } + $scaler = self::getScalerType( $dstPath ); wfDebug( __METHOD__ . ": scaler $scaler\n" ); if ( $scaler == 'client' ) { @@ -127,13 +153,28 @@ class BitmapHandler extends ImageHandler { return $this->getClientScalingThumbnailImage( $image, $scalerParams ); } + # Try a hook + $mto = null; + wfRunHooks( 'BitmapHandlerTransform', array( $this, $image, &$scalerParams, &$mto ) ); + if ( !is_null( $mto ) ) { + wfDebug( __METHOD__ . ": Hook to BitmapHandlerTransform created an mto\n" ); + $scaler = 'hookaborted'; + } + switch ( $scaler ) { + case 'hookaborted': + # Handled by the hook above + $err = $mto->isError() ? $mto : false; + break; case 'im': $err = $this->transformImageMagick( $image, $scalerParams ); break; case 'custom': $err = $this->transformCustom( $image, $scalerParams ); break; + case 'imext': + $err = $this->transformImageMagickExt( $image, $scalerParams ); + break; case 'gd': default: $err = $this->transformGd( $image, $scalerParams ); @@ -149,6 +190,8 @@ class BitmapHandler extends ImageHandler { # Thumbnail was zero-byte and had to be removed return new MediaTransformError( 'thumbnail_error', $scalerParams['clientWidth'], $scalerParams['clientHeight'] ); + } elseif ( $mto ) { + return $mto; } else { return new ThumbnailImage( $image, $dstUrl, $scalerParams['clientWidth'], $scalerParams['clientHeight'], $dstPath ); @@ -156,12 +199,49 @@ class BitmapHandler extends ImageHandler { } /** + * Returns which scaler type should be used. Creates parent directories + * for $dstPath and returns 'client' on error + * + * @return string client,im,custom,gd + */ + protected static function getScalerType( $dstPath, $checkDstPath = true ) { + global $wgUseImageResize, $wgUseImageMagick, $wgCustomConvertCommand; + + if ( !$dstPath && $checkDstPath ) { + # No output path available, client side scaling only + $scaler = 'client'; + } elseif ( !$wgUseImageResize ) { + $scaler = 'client'; + } elseif ( $wgUseImageMagick ) { + $scaler = 'im'; + } elseif ( $wgCustomConvertCommand ) { + $scaler = 'custom'; + } elseif ( function_exists( 'imagecreatetruecolor' ) ) { + $scaler = 'gd'; + } elseif ( class_exists( 'Imagick' ) ) { + $scaler = 'imext'; + } else { + $scaler = 'client'; + } + + if ( $scaler != 'client' && $dstPath ) { + if ( !wfMkdirParents( dirname( $dstPath ) ) ) { + # Unable to create a path for the thumbnail + return 'client'; + } + } + return $scaler; + } + + /** * Get a ThumbnailImage that respresents an image that will be scaled * client side * * @param $image File File associated with this thumbnail * @param $params array Array with scaler params * @return ThumbnailImage + * + * @fixme no rotation support */ protected function getClientScalingThumbnailImage( $image, $params ) { return new ThumbnailImage( $image, $image->getURL(), @@ -215,7 +295,7 @@ class BitmapHandler extends ImageHandler { // We optimize the output, but -optimize is broken, // use optimizeTransparency instead (bug 11822) if ( version_compare( $this->getMagickVersion(), "6.3.5" ) >= 0 ) { - $animation_post = '-fuzz 5% -layers optimizeTransparency +map'; + $animation_post = '-fuzz 5% -layers optimizeTransparency'; } } } @@ -225,6 +305,9 @@ class BitmapHandler extends ImageHandler { if ( strval( $wgImageMagickTempDir ) !== '' ) { $env['MAGICK_TMPDIR'] = $wgImageMagickTempDir; } + + $rotation = $this->getRotation( $image ); + list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation ); $cmd = wfEscapeShellArg( $wgImageMagickConvertCommand ) . @@ -237,12 +320,13 @@ class BitmapHandler extends ImageHandler { // For the -thumbnail option a "!" is needed to force exact size, // or ImageMagick may decide your ratio is wrong and slice off // a pixel. - " -thumbnail " . wfEscapeShellArg( "{$params['physicalDimensions']}!" ) . + " -thumbnail " . wfEscapeShellArg( "{$width}x{$height}!" ) . // Add the source url as a comment to the thumb, but don't add the flag if there's no comment ( $params['comment'] !== '' ? " -set comment " . wfEscapeShellArg( $this->escapeMagickProperty( $params['comment'] ) ) : '' ) . - " -depth 8 $sharpen" . + " -depth 8 $sharpen " . + " -rotate -$rotation " . " {$animation_post} " . wfEscapeShellArg( $this->escapeMagickOutput( $params['dstPath'] ) ) . " 2>&1"; @@ -261,6 +345,84 @@ class BitmapHandler extends ImageHandler { } /** + * Transform an image using the Imagick PHP extension + * + * @param $image File File associated with this thumbnail + * @param $params array Array with scaler params + * + * @return MediaTransformError Error object if error occured, false (=no error) otherwise + */ + protected function transformImageMagickExt( $image, $params ) { + global $wgSharpenReductionThreshold, $wgSharpenParameter, $wgMaxAnimatedGifArea; + + try { + $im = new Imagick(); + $im->readImage( $params['srcPath'] ); + + if ( $params['mimeType'] == 'image/jpeg' ) { + // Sharpening, see bug 6193 + if ( ( $params['physicalWidth'] + $params['physicalHeight'] ) + / ( $params['srcWidth'] + $params['srcHeight'] ) + < $wgSharpenReductionThreshold ) { + // Hack, since $wgSharpenParamater is written specifically for the command line convert + list( $radius, $sigma ) = explode( 'x', $wgSharpenParameter ); + $im->sharpenImage( $radius, $sigma ); + } + $im->setCompressionQuality( 80 ); + } elseif( $params['mimeType'] == 'image/png' ) { + $im->setCompressionQuality( 95 ); + } elseif ( $params['mimeType'] == 'image/gif' ) { + if ( $this->getImageArea( $image, $params['srcWidth'], + $params['srcHeight'] ) > $wgMaxAnimatedGifArea ) { + // Extract initial frame only; we're so big it'll + // be a total drag. :P + $im->setImageScene( 0 ); + } elseif ( $this->isAnimatedImage( $image ) ) { + // Coalesce is needed to scale animated GIFs properly (bug 1017). + $im = $im->coalesceImages(); + } + } + + $rotation = $this->getRotation( $image ); + list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation ); + + $im->setImageBackgroundColor( new ImagickPixel( 'white' ) ); + + // Call Imagick::thumbnailImage on each frame + foreach ( $im as $i => $frame ) { + if ( !$frame->thumbnailImage( $width, $height, /* fit */ false ) ) { + return $this->getMediaTransformError( $params, "Error scaling frame $i" ); + } + } + $im->setImageDepth( 8 ); + + if ( $rotation ) { + if ( !$im->rotateImage( new ImagickPixel( 'white' ), 360 - $rotation ) ) { + return $this->getMediaTransformError( $params, "Error rotating $rotation degrees" ); + } + } + + if ( $this->isAnimatedImage( $image ) ) { + wfDebug( __METHOD__ . ": Writing animated thumbnail\n" ); + // This is broken somehow... can't find out how to fix it + $result = $im->writeImages( $params['dstPath'], true ); + } else { + $result = $im->writeImage( $params['dstPath'] ); + } + if ( !$result ) { + return $this->getMediaTransformError( $params, + "Unable to write thumbnail to {$params['dstPath']}" ); + } + + } catch ( ImagickException $e ) { + return $this->getMediaTransformError( $params, $e->getMessage() ); + } + + return false; + + } + + /** * Transform an image using a custom command * * @param $image File File associated with this thumbnail @@ -306,12 +468,12 @@ class BitmapHandler extends ImageHandler { } /** * Get a MediaTransformError with error 'thumbnail_error' - * + * * @param $params array Parameter array as passed to the transform* functions * @param $errMsg string Error message * @return MediaTransformError */ - protected function getMediaTransformError( $params, $errMsg ) { + public function getMediaTransformError( $params, $errMsg ) { return new MediaTransformError( 'thumbnail_error', $params['clientWidth'], $params['clientHeight'], $errMsg ); } @@ -360,8 +522,10 @@ class BitmapHandler extends ImageHandler { } $src_image = call_user_func( $loader, $params['srcPath'] ); - $dst_image = imagecreatetruecolor( $params['physicalWidth'], - $params['physicalHeight'] ); + + $rotation = function_exists( 'imagerotate' ) ? $this->getRotation( $image ) : 0; + list( $width, $height ) = $this->extractPreRotationDimensions( $params, $rotation ); + $dst_image = imagecreatetruecolor( $width, $height ); // Initialise the destination image to transparent instead of // the default solid black, to support PNG and GIF transparency nicely @@ -374,15 +538,21 @@ class BitmapHandler extends ImageHandler { // It may just uglify them, and completely breaks transparency. imagecopyresized( $dst_image, $src_image, 0, 0, 0, 0, - $params['physicalWidth'], $params['physicalHeight'], + $width, $height, imagesx( $src_image ), imagesy( $src_image ) ); } else { imagecopyresampled( $dst_image, $src_image, 0, 0, 0, 0, - $params['physicalWidth'], $params['physicalHeight'], + $width, $height, imagesx( $src_image ), imagesy( $src_image ) ); } + if ( $rotation % 360 != 0 && $rotation % 90 == 0 ) { + $rot_image = imagerotate( $dst_image, $rotation, 0 ); + imagedestroy( $dst_image ); + $dst_image = $rot_image; + } + imagesavealpha( $dst_image, true ); call_user_func( $saveType, $dst_image, $params['dstPath'] ); @@ -508,98 +678,57 @@ class BitmapHandler extends ImageHandler { imagejpeg( $dst_image, $thumbPath, 95 ); } - - function getMetadata( $image, $filename ) { - global $wgShowEXIF; - if ( $wgShowEXIF && file_exists( $filename ) ) { - $exif = new Exif( $filename ); - $data = $exif->getFilteredData(); - if ( $data ) { - $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); - return serialize( $data ); - } else { - return '0'; - } - } else { - return ''; - } - } - - function getMetadataType( $image ) { - return 'exif'; - } - - function isMetadataValid( $image, $metadata ) { - global $wgShowEXIF; - if ( !$wgShowEXIF ) { - # Metadata disabled and so an empty field is expected - return true; - } - if ( $metadata === '0' ) { - # Special value indicating that there is no EXIF data in the file - return true; - } - wfSuppressWarnings(); - $exif = unserialize( $metadata ); - wfRestoreWarnings(); - if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) || - $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() ) - { - # Wrong version - wfDebug( __METHOD__ . ": wrong version\n" ); - return false; - } - return true; + /** + * On supporting image formats, try to read out the low-level orientation + * of the file and return the angle that the file needs to be rotated to + * be viewed. + * + * This information is only useful when manipulating the original file; + * the width and height we normally work with is logical, and will match + * any produced output views. + * + * The base BitmapHandler doesn't understand any metadata formats, so this + * is left up to child classes to implement. + * + * @param $file File + * @return int 0, 90, 180 or 270 + */ + public function getRotation( $file ) { + return 0; } /** - * Get a list of EXIF metadata items which should be displayed when - * the metadata table is collapsed. + * Returns whether the current scaler supports rotation (im and gd do) * - * @return array of strings - * @access private + * @return bool */ - function visibleMetadataFields() { - $fields = array(); - $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) ); - foreach ( $lines as $line ) { - $matches = array(); - if ( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) { - $fields[] = $matches[1]; - } + public static function canRotate() { + $scaler = self::getScalerType( null, false ); + switch ( $scaler ) { + case 'im': + # ImageMagick supports autorotation + return true; + case 'imext': + # Imagick::rotateImage + return true; + case 'gd': + # GD's imagerotate function is used to rotate images, but not + # all precompiled PHP versions have that function + return function_exists( 'imagerotate' ); + default: + # Other scalers don't support rotation + return false; } - $fields = array_map( 'strtolower', $fields ); - return $fields; } - function formatMetadata( $image ) { - $result = array( - 'visible' => array(), - 'collapsed' => array() - ); - $metadata = $image->getMetadata(); - if ( !$metadata ) { - return false; - } - $exif = unserialize( $metadata ); - if ( !$exif ) { - return false; - } - unset( $exif['MEDIAWIKI_EXIF_VERSION'] ); - $format = new FormatExif( $exif ); - - $formatted = $format->getFormattedData(); - // Sort fields into visible and collapsed - $visibleFields = $this->visibleMetadataFields(); - foreach ( $formatted as $name => $value ) { - $tag = strtolower( $name ); - self::addMeta( $result, - in_array( $tag, $visibleFields ) ? 'visible' : 'collapsed', - 'exif', - $tag, - $value - ); - } - return $result; + /** + * Rerurns whether the file needs to be rendered. Returns true if the + * file requires rotation and we are able to rotate it. + * + * @param $file File + * @return bool + */ + public function mustRender( $file ) { + return self::canRotate() && $this->getRotation( $file ) != 0; } } diff --git a/includes/media/BitmapMetadataHandler.php b/includes/media/BitmapMetadataHandler.php new file mode 100644 index 00000000..d1caa67a --- /dev/null +++ b/includes/media/BitmapMetadataHandler.php @@ -0,0 +1,269 @@ +<?php +/** +Class to deal with reconciling and extracting metadata from bitmap images. +This is meant to comply with http://www.metadataworkinggroup.org/pdf/mwg_guidance.pdf + +This sort of acts as an intermediary between MediaHandler::getMetadata +and the various metadata extractors. + +@todo other image formats. +*/ +class BitmapMetadataHandler { + + private $metadata = array(); + private $metaPriority = array( + 20 => array( 'other' ), + 40 => array( 'native' ), + 60 => array( 'iptc-good-hash', 'iptc-no-hash' ), + 70 => array( 'xmp-deprecated' ), + 80 => array( 'xmp-general' ), + 90 => array( 'xmp-exif' ), + 100 => array( 'iptc-bad-hash' ), + 120 => array( 'exif' ), + ); + private $iptcType = 'iptc-no-hash'; + + /** + * This does the photoshop image resource app13 block + * of interest, IPTC-IIM metadata is stored here. + * + * Mostly just calls doPSIR and doIPTC + * + * @param String $app13 String containing app13 block from jpeg file + */ + private function doApp13 ( $app13 ) { + $this->iptcType = JpegMetadataExtractor::doPSIR( $app13 ); + + $iptc = IPTC::parse( $app13 ); + $this->addMetadata( $iptc, $this->iptcType ); + } + + + /** + * Get exif info using exif class. + * Basically what used to be in BitmapHandler::getMetadata(). + * Just calls stuff in the Exif class. + * + * @param $filename string + */ + function getExif ( $filename, $byteOrder ) { + global $wgShowEXIF; + if ( file_exists( $filename ) && $wgShowEXIF ) { + $exif = new Exif( $filename, $byteOrder ); + $data = $exif->getFilteredData(); + if ( $data ) { + $this->addMetadata( $data, 'exif' ); + } + } + } + /** Add misc metadata. Warning: atm if the metadata category + * doesn't have a priority, it will be silently discarded. + * + * @param Array $metaArray array of metadata values + * @param string $type type. defaults to other. if two things have the same type they're merged + */ + function addMetadata ( $metaArray, $type = 'other' ) { + if ( isset( $this->metadata[$type] ) ) { + /* merge with old data */ + $metaArray = $metaArray + $this->metadata[$type]; + } + + $this->metadata[$type] = $metaArray; + } + + /** + * Merge together the various types of metadata + * the different types have different priorites, + * and are merged in order. + * + * This function is generally called by the media handlers' getMetadata() + * + * @return Array metadata array + */ + function getMetadataArray () { + // this seems a bit ugly... This is all so its merged in right order + // based on the MWG recomendation. + $temp = Array(); + krsort( $this->metaPriority ); + foreach ( $this->metaPriority as $pri ) { + foreach ( $pri as $type ) { + if ( isset( $this->metadata[$type] ) ) { + // Do some special casing for multilingual values. + // Don't discard translations if also as a simple value. + foreach ( $this->metadata[$type] as $itemName => $item ) { + if ( is_array( $item ) && isset( $item['_type'] ) && $item['_type'] === 'lang' ) { + if ( isset( $temp[$itemName] ) && !is_array( $temp[$itemName] ) ) { + $default = $temp[$itemName]; + $temp[$itemName] = $item; + $temp[$itemName]['x-default'] = $default; + unset( $this->metadata[$type][$itemName] ); + } + } + } + + $temp = $temp + $this->metadata[$type]; + } + } + } + return $temp; + } + + /** Main entry point for jpeg's. + * + * @param $filename string filename (with full path) + * @return metadata result array. + * @throws MWException on invalid file. + */ + static function Jpeg ( $filename ) { + $showXMP = function_exists( 'xml_parser_create_ns' ); + $meta = new self(); + + $seg = JpegMetadataExtractor::segmentSplitter( $filename ); + if ( isset( $seg['COM'] ) && isset( $seg['COM'][0] ) ) { + $meta->addMetadata( Array( 'JPEGFileComment' => $seg['COM'] ), 'native' ); + } + if ( isset( $seg['PSIR'] ) ) { + $meta->doApp13( $seg['PSIR'] ); + } + if ( isset( $seg['XMP'] ) && $showXMP ) { + $xmp = new XMPReader(); + $xmp->parse( $seg['XMP'] ); + foreach ( $seg['XMP_ext'] as $xmpExt ) { + /* Support for extended xmp in jpeg files + * is not well tested and a bit fragile. + */ + $xmp->parseExtended( $xmpExt ); + + } + $res = $xmp->getResults(); + foreach ( $res as $type => $array ) { + $meta->addMetadata( $array, $type ); + } + } + if ( isset( $seg['byteOrder'] ) ) { + $meta->getExif( $filename, $seg['byteOrder'] ); + } + return $meta->getMetadataArray(); + } + + /** Entry point for png + * At some point in the future this might + * merge the png various tEXt chunks to that + * are interesting, but for now it only does XMP + * + * @param $filename String full path to file + * @return Array Array for storage in img_metadata. + */ + static public function PNG ( $filename ) { + $showXMP = function_exists( 'xml_parser_create_ns' ); + + $meta = new self(); + $array = PNGMetadataExtractor::getMetadata( $filename ); + if ( isset( $array['text']['xmp']['x-default'] ) && $array['text']['xmp']['x-default'] !== '' && $showXMP ) { + $xmp = new XMPReader(); + $xmp->parse( $array['text']['xmp']['x-default'] ); + $xmpRes = $xmp->getResults(); + foreach ( $xmpRes as $type => $xmpSection ) { + $meta->addMetadata( $xmpSection, $type ); + } + } + unset( $array['text']['xmp'] ); + $meta->addMetadata( $array['text'], 'native' ); + unset( $array['text'] ); + $array['metadata'] = $meta->getMetadataArray(); + $array['metadata']['_MW_PNG_VERSION'] = PNGMetadataExtractor::VERSION; + return $array; + } + + /** function for gif images. + * + * They don't really have native metadata, so just merges together + * XMP and image comment. + * + * @param $filename full path to file + * @return Array metadata array + */ + static public function GIF ( $filename ) { + + $meta = new self(); + $baseArray = GIFMetadataExtractor::getMetadata( $filename ); + + if ( count( $baseArray['comment'] ) > 0 ) { + $meta->addMetadata( array( 'GIFFileComment' => $baseArray['comment'] ), 'native' ); + } + + if ( $baseArray['xmp'] !== '' && function_exists( 'xml_parser_create_ns' ) ) { + $xmp = new XMPReader(); + $xmp->parse( $baseArray['xmp'] ); + $xmpRes = $xmp->getResults(); + foreach ( $xmpRes as $type => $xmpSection ) { + $meta->addMetadata( $xmpSection, $type ); + } + + } + + unset( $baseArray['comment'] ); + unset( $baseArray['xmp'] ); + + $baseArray['metadata'] = $meta->getMetadataArray(); + $baseArray['metadata']['_MW_GIF_VERSION'] = GIFMetadataExtractor::VERSION; + return $baseArray; + } + + /** + * This doesn't do much yet, but eventually I plan to add + * XMP support for Tiff. (PHP's exif support already extracts + * but needs some further processing because PHP's exif support + * is stupid...) + * + * @todo Add XMP support, so this function actually makes + * sense to put here. + * + * The various exceptions this throws are caught later. + * @param $filename String + * @return Array The metadata. + */ + static public function Tiff ( $filename ) { + if ( file_exists( $filename ) ) { + $byteOrder = self::getTiffByteOrder( $filename ); + if ( !$byteOrder ) { + throw new MWException( "Error determining byte order of $filename" ); + } + $exif = new Exif( $filename, $byteOrder ); + $data = $exif->getFilteredData(); + if ( $data ) { + $data['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); + return $data; + } else { + throw new MWException( "Could not extract data from tiff file $filename" ); + } + } else { + throw new MWException( "File doesn't exist - $filename" ); + } + } + /** + * Read the first 2 bytes of a tiff file to figure out + * Little Endian or Big Endian. Needed for exif stuff. + * + * @param $filename String The filename + * @return String 'BE' or 'LE' or false + */ + static function getTiffByteOrder( $filename ) { + $fh = fopen( $filename, 'rb' ); + if ( !$fh ) return false; + $head = fread( $fh, 2 ); + fclose( $fh ); + + switch( $head ) { + case 'II': + return 'LE'; // II for intel. + case 'MM': + return 'BE'; // MM for motorla. + default: + return false; // Something went wrong. + + } + } + + +} diff --git a/includes/media/Bitmap_ClientOnly.php b/includes/media/Bitmap_ClientOnly.php index 9f6f7b33..50679229 100644 --- a/includes/media/Bitmap_ClientOnly.php +++ b/includes/media/Bitmap_ClientOnly.php @@ -15,10 +15,24 @@ * @ingroup Media */ class BitmapHandler_ClientOnly extends BitmapHandler { + + /** + * @param $image File + * @param $params + * @return bool + */ function normaliseParams( $image, &$params ) { return ImageHandler::normaliseParams( $image, $params ); } + /** + * @param $image File + * @param $dstPath + * @param $dstUrl + * @param $params + * @param int $flags + * @return ThumbnailImage|TransformParameterError + */ function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { if ( !$this->normaliseParams( $image, $params ) ) { return new TransformParameterError( $params ); diff --git a/includes/media/DjVu.php b/includes/media/DjVu.php index cc3f1db5..2833f683 100644 --- a/includes/media/DjVu.php +++ b/includes/media/DjVu.php @@ -5,13 +5,17 @@ * @file * @ingroup Media */ - + /** * Handler for DjVu images * * @ingroup Media */ class DjVuHandler extends ImageHandler { + + /** + * @return bool + */ function isEnabled() { global $wgDjvuRenderer, $wgDjvuDump, $wgDjvuToXML; if ( !$wgDjvuRenderer || ( !$wgDjvuDump && !$wgDjvuToXML ) ) { @@ -22,9 +26,25 @@ class DjVuHandler extends ImageHandler { } } - function mustRender( $file ) { return true; } - function isMultiPage( $file ) { return true; } + /** + * @param $file + * @return bool + */ + function mustRender( $file ) { + return true; + } + + /** + * @param $file + * @return bool + */ + function isMultiPage( $file ) { + return true; + } + /** + * @return array + */ function getParamMap() { return array( 'img_width' => 'width', @@ -32,6 +52,11 @@ class DjVuHandler extends ImageHandler { ); } + /** + * @param $name + * @param $value + * @return bool + */ function validateParam( $name, $value ) { if ( in_array( $name, array( 'width', 'height', 'page' ) ) ) { if ( $value <= 0 ) { @@ -44,6 +69,10 @@ class DjVuHandler extends ImageHandler { } } + /** + * @param $params + * @return bool|string + */ function makeParamString( $params ) { $page = isset( $params['page'] ) ? $params['page'] : 1; if ( !isset( $params['width'] ) ) { @@ -52,6 +81,10 @@ class DjVuHandler extends ImageHandler { return "page{$page}-{$params['width']}px"; } + /** + * @param $str + * @return array|bool + */ function parseParamString( $str ) { $m = false; if ( preg_match( '/^page(\d+)-(\d+)px$/', $str, $m ) ) { @@ -61,6 +94,10 @@ class DjVuHandler extends ImageHandler { } } + /** + * @param $params + * @return array + */ function getScriptParams( $params ) { return array( 'width' => $params['width'], @@ -68,6 +105,14 @@ class DjVuHandler extends ImageHandler { ); } + /** + * @param $image File + * @param $dstPath + * @param $dstUrl + * @param $params + * @param int $flags + * @return MediaTransformError|ThumbnailImage|TransformParameterError + */ function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { global $wgDjvuRenderer, $wgDjvuPostProcessor; @@ -75,7 +120,9 @@ class DjVuHandler extends ImageHandler { // normaliseParams will inevitably give. $xml = $image->getMetadata(); if ( !$xml ) { - return new MediaTransformError( 'thumbnail_error', @$params['width'], @$params['height'], + $width = isset( $params['width'] ) ? $params['width'] : 0; + $height = isset( $params['height'] ) ? $params['height'] : 0; + return new MediaTransformError( 'thumbnail_error', $width, $height, wfMsg( 'djvu_no_xml' ) ); } @@ -100,7 +147,8 @@ class DjVuHandler extends ImageHandler { # Use a subshell (brackets) to aggregate stderr from both pipeline commands # before redirecting it to the overall stdout. This works in both Linux and Windows XP. - $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page} -size={$width}x{$height} " . + $cmd = '(' . wfEscapeShellArg( $wgDjvuRenderer ) . " -format=ppm -page={$page}" . + " -size={$params['physicalWidth']}x{$params['physicalHeight']} " . wfEscapeShellArg( $srcPath ); if ( $wgDjvuPostProcessor ) { $cmd .= " | {$wgDjvuPostProcessor}"; @@ -125,6 +173,8 @@ class DjVuHandler extends ImageHandler { /** * Cache an instance of DjVuImage in an Image object, return that instance + * + * @return DjVuImage */ function getDjVuImage( $image, $path ) { if ( !$image ) { @@ -139,6 +189,7 @@ class DjVuHandler extends ImageHandler { /** * Cache a document tree for the DjVu XML metadata + * @param $image File */ function getMetaTree( $image , $gettext = false ) { if ( isset( $image->dejaMetaTree ) ) { @@ -159,11 +210,11 @@ class DjVuHandler extends ImageHandler { $image->djvuTextTree = false; $tree = new SimpleXMLElement( $metadata ); if( $tree->getName() == 'mw-djvu' ) { - foreach($tree->children() as $b){ + foreach($tree->children() as $b){ if( $b->getName() == 'DjVuTxt' ) { $image->djvuTextTree = $b; } - else if ( $b->getName() == 'DjVuXML' ) { + elseif ( $b->getName() == 'DjVuXML' ) { $image->dejaMetaTree = $b; } } diff --git a/includes/media/Exif.php b/includes/media/Exif.php new file mode 100644 index 00000000..345a6f19 --- /dev/null +++ b/includes/media/Exif.php @@ -0,0 +1,836 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @ingroup Media + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License + * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification + * @file + */ + +/** + * Class to extract and validate Exif data from jpeg (and possibly tiff) files. + * @ingroup Media + */ +class Exif { + + const BYTE = 1; //!< An 8-bit (1-byte) unsigned integer. + const ASCII = 2; //!< An 8-bit byte containing one 7-bit ASCII code. The final byte is terminated with NULL. + const SHORT = 3; //!< A 16-bit (2-byte) unsigned integer. + const LONG = 4; //!< A 32-bit (4-byte) unsigned integer. + const RATIONAL = 5; //!< Two LONGs. The first LONG is the numerator and the second LONG expresses the denominator + const UNDEFINED = 7; //!< An 8-bit byte that can take any value depending on the field definition + const SLONG = 9; //!< A 32-bit (4-byte) signed integer (2's complement notation), + const SRATIONAL = 10; //!< Two SLONGs. The first SLONG is the numerator and the second SLONG is the denominator. + const IGNORE = -1; // A fake value for things we don't want or don't support. + + //@{ + /* @var array + * @private + */ + + /** + * Exif tags grouped by category, the tagname itself is the key and the type + * is the value, in the case of more than one possible value type they are + * separated by commas. + */ + var $mExifTags; + + /** + * The raw Exif data returned by exif_read_data() + */ + var $mRawExifData; + + /** + * A Filtered version of $mRawExifData that has been pruned of invalid + * tags and tags that contain content they shouldn't contain according + * to the Exif specification + */ + var $mFilteredExifData; + + /** + * Filtered and formatted Exif data, see FormatMetadata::getFormattedData() + */ + var $mFormattedExifData; + + //@} + + //@{ + /* @var string + * @private + */ + + /** + * The file being processed + */ + var $file; + + /** + * The basename of the file being processed + */ + var $basename; + + /** + * The private log to log to, e.g. 'exif' + */ + var $log = false; + + /** + * The byte order of the file. Needed because php's + * extension doesn't fully process some obscure props. + */ + private $byteOrder; + //@} + + /** + * Constructor + * + * @param $file String: filename. + * @todo FIXME: The following are broke: + * SubjectArea. Need to test the more obscure tags. + * + * DigitalZoomRatio = 0/0 is rejected. need to determine if that's valid. + * possibly should treat 0/0 = 0. need to read exif spec on that. + */ + function __construct( $file, $byteOrder = '' ) { + /** + * Page numbers here refer to pages in the EXIF 2.2 standard + * + * Note, Exif::UNDEFINED is treated as a string, not as an array of bytes + * so don't put a count parameter for any UNDEFINED values. + * + * @link http://exif.org/Exif2-2.PDF The Exif 2.2 specification + */ + $this->mExifTags = array( + # TIFF Rev. 6.0 Attribute Information (p22) + 'IFD0' => array( + # Tags relating to image structure + 'ImageWidth' => Exif::SHORT.','.Exif::LONG, # Image width + 'ImageLength' => Exif::SHORT.','.Exif::LONG, # Image height + 'BitsPerSample' => array( Exif::SHORT, 3 ), # Number of bits per component + # "When a primary image is JPEG compressed, this designation is not" + # "necessary and is omitted." (p23) + 'Compression' => Exif::SHORT, # Compression scheme #p23 + 'PhotometricInterpretation' => Exif::SHORT, # Pixel composition #p23 + 'Orientation' => Exif::SHORT, # Orientation of image #p24 + 'SamplesPerPixel' => Exif::SHORT, # Number of components + 'PlanarConfiguration' => Exif::SHORT, # Image data arrangement #p24 + 'YCbCrSubSampling' => array( Exif::SHORT, 2), # Subsampling ratio of Y to C #p24 + 'YCbCrPositioning' => Exif::SHORT, # Y and C positioning #p24-25 + 'XResolution' => Exif::RATIONAL, # Image resolution in width direction + 'YResolution' => Exif::RATIONAL, # Image resolution in height direction + 'ResolutionUnit' => Exif::SHORT, # Unit of X and Y resolution #(p26) + + # Tags relating to recording offset + 'StripOffsets' => Exif::SHORT.','.Exif::LONG, # Image data location + 'RowsPerStrip' => Exif::SHORT.','.Exif::LONG, # Number of rows per strip + 'StripByteCounts' => Exif::SHORT.','.Exif::LONG, # Bytes per compressed strip + 'JPEGInterchangeFormat' => Exif::SHORT.','.Exif::LONG, # Offset to JPEG SOI + 'JPEGInterchangeFormatLength' => Exif::SHORT.','.Exif::LONG, # Bytes of JPEG data + + # Tags relating to image data characteristics + 'TransferFunction' => Exif::IGNORE, # Transfer function + 'WhitePoint' => array( Exif::RATIONAL, 2), # White point chromaticity + 'PrimaryChromaticities' => array( Exif::RATIONAL, 6), # Chromaticities of primarities + 'YCbCrCoefficients' => array( Exif::RATIONAL, 3), # Color space transformation matrix coefficients #p27 + 'ReferenceBlackWhite' => array( Exif::RATIONAL, 6), # Pair of black and white reference values + + # Other tags + 'DateTime' => Exif::ASCII, # File change date and time + 'ImageDescription' => Exif::ASCII, # Image title + 'Make' => Exif::ASCII, # Image input equipment manufacturer + 'Model' => Exif::ASCII, # Image input equipment model + 'Software' => Exif::ASCII, # Software used + 'Artist' => Exif::ASCII, # Person who created the image + 'Copyright' => Exif::ASCII, # Copyright holder + ), + + # Exif IFD Attribute Information (p30-31) + 'EXIF' => array( + # TODO: NOTE: Nonexistence of this field is taken to mean nonconformance + # to the EXIF 2.1 AND 2.2 standards + 'ExifVersion' => Exif::UNDEFINED, # Exif version + 'FlashPixVersion' => Exif::UNDEFINED, # Supported Flashpix version #p32 + + # Tags relating to Image Data Characteristics + 'ColorSpace' => Exif::SHORT, # Color space information #p32 + + # Tags relating to image configuration + 'ComponentsConfiguration' => Exif::UNDEFINED, # Meaning of each component #p33 + 'CompressedBitsPerPixel' => Exif::RATIONAL, # Image compression mode + 'PixelYDimension' => Exif::SHORT.','.Exif::LONG, # Valid image width + 'PixelXDimension' => Exif::SHORT.','.Exif::LONG, # Valid image height + + # Tags relating to related user information + 'MakerNote' => Exif::IGNORE, # Manufacturer notes + 'UserComment' => Exif::UNDEFINED, # User comments #p34 + + # Tags relating to related file information + 'RelatedSoundFile' => Exif::ASCII, # Related audio file + + # Tags relating to date and time + 'DateTimeOriginal' => Exif::ASCII, # Date and time of original data generation #p36 + 'DateTimeDigitized' => Exif::ASCII, # Date and time of original data generation + 'SubSecTime' => Exif::ASCII, # DateTime subseconds + 'SubSecTimeOriginal' => Exif::ASCII, # DateTimeOriginal subseconds + 'SubSecTimeDigitized' => Exif::ASCII, # DateTimeDigitized subseconds + + # Tags relating to picture-taking conditions (p31) + 'ExposureTime' => Exif::RATIONAL, # Exposure time + 'FNumber' => Exif::RATIONAL, # F Number + 'ExposureProgram' => Exif::SHORT, # Exposure Program #p38 + 'SpectralSensitivity' => Exif::ASCII, # Spectral sensitivity + 'ISOSpeedRatings' => Exif::SHORT, # ISO speed rating + 'OECF' => Exif::IGNORE, + # Optoelectronic conversion factor. Note: We don't have support for this atm. + 'ShutterSpeedValue' => Exif::SRATIONAL, # Shutter speed + 'ApertureValue' => Exif::RATIONAL, # Aperture + 'BrightnessValue' => Exif::SRATIONAL, # Brightness + 'ExposureBiasValue' => Exif::SRATIONAL, # Exposure bias + 'MaxApertureValue' => Exif::RATIONAL, # Maximum land aperture + 'SubjectDistance' => Exif::RATIONAL, # Subject distance + 'MeteringMode' => Exif::SHORT, # Metering mode #p40 + 'LightSource' => Exif::SHORT, # Light source #p40-41 + 'Flash' => Exif::SHORT, # Flash #p41-42 + 'FocalLength' => Exif::RATIONAL, # Lens focal length + 'SubjectArea' => array( Exif::SHORT, 4 ), # Subject area + 'FlashEnergy' => Exif::RATIONAL, # Flash energy + 'SpatialFrequencyResponse' => Exif::IGNORE, # Spatial frequency response. Not supported atm. + 'FocalPlaneXResolution' => Exif::RATIONAL, # Focal plane X resolution + 'FocalPlaneYResolution' => Exif::RATIONAL, # Focal plane Y resolution + 'FocalPlaneResolutionUnit' => Exif::SHORT, # Focal plane resolution unit #p46 + 'SubjectLocation' => array( Exif::SHORT, 2), # Subject location + 'ExposureIndex' => Exif::RATIONAL, # Exposure index + 'SensingMethod' => Exif::SHORT, # Sensing method #p46 + 'FileSource' => Exif::UNDEFINED, # File source #p47 + 'SceneType' => Exif::UNDEFINED, # Scene type #p47 + 'CFAPattern' => Exif::IGNORE, # CFA pattern. not supported atm. + 'CustomRendered' => Exif::SHORT, # Custom image processing #p48 + 'ExposureMode' => Exif::SHORT, # Exposure mode #p48 + 'WhiteBalance' => Exif::SHORT, # White Balance #p49 + 'DigitalZoomRatio' => Exif::RATIONAL, # Digital zoom ration + 'FocalLengthIn35mmFilm' => Exif::SHORT, # Focal length in 35 mm film + 'SceneCaptureType' => Exif::SHORT, # Scene capture type #p49 + 'GainControl' => Exif::SHORT, # Scene control #p49-50 + 'Contrast' => Exif::SHORT, # Contrast #p50 + 'Saturation' => Exif::SHORT, # Saturation #p50 + 'Sharpness' => Exif::SHORT, # Sharpness #p50 + 'DeviceSettingDescription' => Exif::IGNORE, + # Device settings description. This could maybe be supported. Need to find an + # example file that uses this to see if it has stuff of interest in it. + 'SubjectDistanceRange' => Exif::SHORT, # Subject distance range #p51 + + 'ImageUniqueID' => Exif::ASCII, # Unique image ID + ), + + # GPS Attribute Information (p52) + 'GPS' => array( + 'GPSVersion' => Exif::UNDEFINED, + # Should be an array of 4 Exif::BYTE's. However php treats it as an undefined + # Note exif standard calls this GPSVersionID, but php doesn't like the id suffix + 'GPSLatitudeRef' => Exif::ASCII, # North or South Latitude #p52-53 + 'GPSLatitude' => array( Exif::RATIONAL, 3 ), # Latitude + 'GPSLongitudeRef' => Exif::ASCII, # East or West Longitude #p53 + 'GPSLongitude' => array( Exif::RATIONAL, 3), # Longitude + 'GPSAltitudeRef' => Exif::UNDEFINED, + # Altitude reference. Note, the exif standard says this should be an EXIF::Byte, + # but php seems to disagree. + 'GPSAltitude' => Exif::RATIONAL, # Altitude + 'GPSTimeStamp' => array( Exif::RATIONAL, 3), # GPS time (atomic clock) + 'GPSSatellites' => Exif::ASCII, # Satellites used for measurement + 'GPSStatus' => Exif::ASCII, # Receiver status #p54 + 'GPSMeasureMode' => Exif::ASCII, # Measurement mode #p54-55 + 'GPSDOP' => Exif::RATIONAL, # Measurement precision + 'GPSSpeedRef' => Exif::ASCII, # Speed unit #p55 + 'GPSSpeed' => Exif::RATIONAL, # Speed of GPS receiver + 'GPSTrackRef' => Exif::ASCII, # Reference for direction of movement #p55 + 'GPSTrack' => Exif::RATIONAL, # Direction of movement + 'GPSImgDirectionRef' => Exif::ASCII, # Reference for direction of image #p56 + 'GPSImgDirection' => Exif::RATIONAL, # Direction of image + 'GPSMapDatum' => Exif::ASCII, # Geodetic survey data used + 'GPSDestLatitudeRef' => Exif::ASCII, # Reference for latitude of destination #p56 + 'GPSDestLatitude' => array( Exif::RATIONAL, 3 ), # Latitude destination + 'GPSDestLongitudeRef' => Exif::ASCII, # Reference for longitude of destination #p57 + 'GPSDestLongitude' => array( Exif::RATIONAL, 3 ), # Longitude of destination + 'GPSDestBearingRef' => Exif::ASCII, # Reference for bearing of destination #p57 + 'GPSDestBearing' => Exif::RATIONAL, # Bearing of destination + 'GPSDestDistanceRef' => Exif::ASCII, # Reference for distance to destination #p57-58 + 'GPSDestDistance' => Exif::RATIONAL, # Distance to destination + 'GPSProcessingMethod' => Exif::UNDEFINED, # Name of GPS processing method + 'GPSAreaInformation' => Exif::UNDEFINED, # Name of GPS area + 'GPSDateStamp' => Exif::ASCII, # GPS date + 'GPSDifferential' => Exif::SHORT, # GPS differential correction + ), + ); + + $this->file = $file; + $this->basename = wfBaseName( $this->file ); + if ( $byteOrder === 'BE' || $byteOrder === 'LE' ) { + $this->byteOrder = $byteOrder; + } else { + // Only give a warning for b/c, since originally we didn't + // require this. The number of things affected by this is + // rather small. + wfWarn( 'Exif class did not have byte order specified. ' + . 'Some properties may be decoded incorrectly.' ); + $this->byteOrder = 'BE'; // BE seems about twice as popular as LE in jpg's. + } + + $this->debugFile( $this->basename, __FUNCTION__, true ); + if( function_exists( 'exif_read_data' ) ) { + wfSuppressWarnings(); + $data = exif_read_data( $this->file, 0, true ); + wfRestoreWarnings(); + } else { + throw new MWException( "Internal error: exif_read_data not present. \$wgShowEXIF may be incorrectly set or not checked by an extension." ); + } + /** + * exif_read_data() will return false on invalid input, such as + * when somebody uploads a file called something.jpeg + * containing random gibberish. + */ + $this->mRawExifData = $data ? $data : array(); + $this->makeFilteredData(); + $this->collapseData(); + $this->debugFile( __FUNCTION__, false ); + } + + /** + * Make $this->mFilteredExifData + */ + function makeFilteredData() { + $this->mFilteredExifData = Array(); + + foreach ( array_keys( $this->mRawExifData ) as $section ) { + if ( !in_array( $section, array_keys( $this->mExifTags ) ) ) { + $this->debug( $section , __FUNCTION__, "'$section' is not a valid Exif section" ); + continue; + } + + foreach ( array_keys( $this->mRawExifData[$section] ) as $tag ) { + if ( !in_array( $tag, array_keys( $this->mExifTags[$section] ) ) ) { + $this->debug( $tag, __FUNCTION__, "'$tag' is not a valid tag in '$section'" ); + continue; + } + + $this->mFilteredExifData[$tag] = $this->mRawExifData[$section][$tag]; + // This is ok, as the tags in the different sections do not conflict. + // except in computed and thumbnail section, which we don't use. + + $value = $this->mRawExifData[$section][$tag]; + if ( !$this->validate( $section, $tag, $value ) ) { + $this->debug( $value, __FUNCTION__, "'$tag' contained invalid data" ); + unset( $this->mFilteredExifData[$tag] ); + } + } + } + } + + /** + * Collapse some fields together. + * This converts some fields from exif form, to a more friendly form. + * For example GPS latitude to a single number. + * + * The rationale behind this is that we're storing data, not presenting to the user + * For example a longitude is a single number describing how far away you are from + * the prime meridian. Well it might be nice to split it up into minutes and seconds + * for the user, it doesn't really make sense to split a single number into 4 parts + * for storage. (degrees, minutes, second, direction vs single floating point number). + * + * Other things this might do (not really sure if they make sense or not): + * Dates -> mediawiki date format. + * convert values that can be in different units to be in one standardized unit. + * + * As an alternative approach, some of this could be done in the validate phase + * if we make up our own types like Exif::DATE. + */ + function collapseData( ) { + + $this->exifGPStoNumber( 'GPSLatitude' ); + $this->exifGPStoNumber( 'GPSDestLatitude' ); + $this->exifGPStoNumber( 'GPSLongitude' ); + $this->exifGPStoNumber( 'GPSDestLongitude' ); + + if ( isset( $this->mFilteredExifData['GPSAltitude'] ) && isset( $this->mFilteredExifData['GPSAltitudeRef'] ) ) { + if ( $this->mFilteredExifData['GPSAltitudeRef'] === "\1" ) { + $this->mFilteredExifData['GPSAltitude'] *= - 1; + } + unset( $this->mFilteredExifData['GPSAltitudeRef'] ); + } + + $this->exifPropToOrd( 'FileSource' ); + $this->exifPropToOrd( 'SceneType' ); + + $this->charCodeString( 'UserComment' ); + $this->charCodeString( 'GPSProcessingMethod'); + $this->charCodeString( 'GPSAreaInformation' ); + + //ComponentsConfiguration should really be an array instead of a string... + //This turns a string of binary numbers into an array of numbers. + + if ( isset ( $this->mFilteredExifData['ComponentsConfiguration'] ) ) { + $val = $this->mFilteredExifData['ComponentsConfiguration']; + $ccVals = array(); + for ($i = 0; $i < strlen($val); $i++) { + $ccVals[$i] = ord( substr($val, $i, 1) ); + } + $ccVals['_type'] = 'ol'; //this is for formatting later. + $this->mFilteredExifData['ComponentsConfiguration'] = $ccVals; + } + + //GPSVersion(ID) is treated as the wrong type by php exif support. + //Go through each byte turning it into a version string. + //For example: "\x02\x02\x00\x00" -> "2.2.0.0" + + //Also change exif tag name from GPSVersion (what php exif thinks it is) + //to GPSVersionID (what the exif standard thinks it is). + + if ( isset ( $this->mFilteredExifData['GPSVersion'] ) ) { + $val = $this->mFilteredExifData['GPSVersion']; + $newVal = ''; + for ($i = 0; $i < strlen($val); $i++) { + if ( $i !== 0 ) { + $newVal .= '.'; + } + $newVal .= ord( substr($val, $i, 1) ); + } + if ( $this->byteOrder === 'LE' ) { + // Need to reverse the string + $newVal2 = ''; + for ( $i = strlen( $newVal ) - 1; $i >= 0; $i-- ) { + $newVal2 .= substr( $newVal, $i, 1 ); + } + $this->mFilteredExifData['GPSVersionID'] = $newVal2; + } else { + $this->mFilteredExifData['GPSVersionID'] = $newVal; + } + unset( $this->mFilteredExifData['GPSVersion'] ); + } + + } + /** + * Do userComment tags and similar. See pg. 34 of exif standard. + * basically first 8 bytes is charset, rest is value. + * This has not been tested on any shift-JIS strings. + * @param $prop String prop name. + */ + private function charCodeString ( $prop ) { + if ( isset( $this->mFilteredExifData[$prop] ) ) { + + if ( strlen($this->mFilteredExifData[$prop]) <= 8 ) { + //invalid. Must be at least 9 bytes long. + + $this->debug( $this->mFilteredExifData[$prop] , __FUNCTION__, false ); + unset($this->mFilteredExifData[$prop]); + return; + } + $charCode = substr( $this->mFilteredExifData[$prop], 0, 8); + $val = substr( $this->mFilteredExifData[$prop], 8); + + + switch ($charCode) { + case "\x4A\x49\x53\x00\x00\x00\x00\x00": + //JIS + $charset = "Shift-JIS"; + break; + case "UNICODE\x00": + $charset = "UTF-16" . $this->byteOrder; + break; + default: //ascii or undefined. + $charset = ""; + break; + } + // This could possibly check to see if iconv is really installed + // or if we're using the compatibility wrapper in globalFunctions.php + if ($charset) { + wfSuppressWarnings(); + $val = iconv($charset, 'UTF-8//IGNORE', $val); + wfRestoreWarnings(); + } else { + // if valid utf-8, assume that, otherwise assume windows-1252 + $valCopy = $val; + UtfNormal::quickIsNFCVerify( $valCopy ); //validates $valCopy. + if ( $valCopy !== $val ) { + wfSuppressWarnings(); + $val = iconv('Windows-1252', 'UTF-8//IGNORE', $val); + wfRestoreWarnings(); + } + } + + //trim and check to make sure not only whitespace. + $val = trim($val); + if ( strlen( $val ) === 0 ) { + //only whitespace. + $this->debug( $this->mFilteredExifData[$prop] , __FUNCTION__, "$prop: Is only whitespace" ); + unset($this->mFilteredExifData[$prop]); + return; + } + + //all's good. + $this->mFilteredExifData[$prop] = $val; + } + } + /** + * Convert an Exif::UNDEFINED from a raw binary string + * to its value. This is sometimes needed depending on + * the type of UNDEFINED field + * @param $prop String name of property + */ + private function exifPropToOrd ( $prop ) { + if ( isset( $this->mFilteredExifData[$prop] ) ) { + $this->mFilteredExifData[$prop] = ord( $this->mFilteredExifData[$prop] ); + } + } + /** + * Convert gps in exif form to a single floating point number + * for example 10 degress 20`40`` S -> -10.34444 + * @param String $prop a gps coordinate exif tag name (like GPSLongitude) + */ + private function exifGPStoNumber ( $prop ) { + $loc =& $this->mFilteredExifData[$prop]; + $dir =& $this->mFilteredExifData[$prop . 'Ref']; + $res = false; + + if ( isset( $loc ) && isset( $dir ) && ( $dir === 'N' || $dir === 'S' || $dir === 'E' || $dir === 'W' ) ) { + list( $num, $denom ) = explode( '/', $loc[0] ); + $res = $num / $denom; + list( $num, $denom ) = explode( '/', $loc[1] ); + $res += ( $num / $denom ) * ( 1 / 60 ); + list( $num, $denom ) = explode( '/', $loc[2] ); + $res += ( $num / $denom ) * ( 1 / 3600 ); + + if ( $dir === 'S' || $dir === 'W' ) { + $res *= - 1; // make negative + } + } + + // update the exif records. + + if ( $res !== false ) { // using !== as $res could potentially be 0 + $this->mFilteredExifData[$prop] = $res; + unset( $this->mFilteredExifData[$prop . 'Ref'] ); + } else { // if invalid + unset( $this->mFilteredExifData[$prop] ); + unset( $this->mFilteredExifData[$prop . 'Ref'] ); + } + } + + /** + * Use FormatMetadata to create formatted values for display to user + * (is this ever used?) + * + * @deprecated since 1.18 + */ + function makeFormattedData( ) { + wfDeprecated( __METHOD__ ); + $this->mFormattedExifData = FormatMetadata::getFormattedData( + $this->mFilteredExifData ); + } + /**#@-*/ + + /**#@+ + * @return array + */ + /** + * Get $this->mRawExifData + */ + function getData() { + return $this->mRawExifData; + } + + /** + * Get $this->mFilteredExifData + */ + function getFilteredData() { + return $this->mFilteredExifData; + } + + /** + * Get $this->mFormattedExifData + * + * This returns the data for display to user. + * Its unclear if this is ever used. + * + * @deprecated since 1.18 + */ + function getFormattedData() { + wfDeprecated( __METHOD__ ); + if (!$this->mFormattedExifData) { + $this->makeFormattedData(); + } + return $this->mFormattedExifData; + } + /**#@-*/ + + /** + * The version of the output format + * + * Before the actual metadata information is saved in the database we + * strip some of it since we don't want to save things like thumbnails + * which usually accompany Exif data. This value gets saved in the + * database along with the actual Exif data, and if the version in the + * database doesn't equal the value returned by this function the Exif + * data is regenerated. + * + * @return int + */ + public static function version() { + return 2; // We don't need no bloddy constants! + } + + /**#@+ + * Validates if a tag value is of the type it should be according to the Exif spec + * + * @private + * + * @param $in Mixed: the input value to check + * @return bool + */ + private function isByte( $in ) { + if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 255 ) { + $this->debug( $in, __FUNCTION__, true ); + return true; + } else { + $this->debug( $in, __FUNCTION__, false ); + return false; + } + } + + /** + * @param $in + * @return bool + */ + private function isASCII( $in ) { + if ( is_array( $in ) ) { + return false; + } + + if ( preg_match( "/[^\x0a\x20-\x7e]/", $in ) ) { + $this->debug( $in, __FUNCTION__, 'found a character not in our whitelist' ); + return false; + } + + if ( preg_match( '/^\s*$/', $in ) ) { + $this->debug( $in, __FUNCTION__, 'input consisted solely of whitespace' ); + return false; + } + + return true; + } + + /** + * @param $in + * @return bool + */ + private function isShort( $in ) { + if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 65536 ) { + $this->debug( $in, __FUNCTION__, true ); + return true; + } else { + $this->debug( $in, __FUNCTION__, false ); + return false; + } + } + + /** + * @param $in + * @return bool + */ + private function isLong( $in ) { + if ( !is_array( $in ) && sprintf('%d', $in) == $in && $in >= 0 && $in <= 4294967296 ) { + $this->debug( $in, __FUNCTION__, true ); + return true; + } else { + $this->debug( $in, __FUNCTION__, false ); + return false; + } + } + + /** + * @param $in + * @return bool + */ + private function isRational( $in ) { + $m = array(); + if ( !is_array( $in ) && @preg_match( '/^(\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero + return $this->isLong( $m[1] ) && $this->isLong( $m[2] ); + } else { + $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); + return false; + } + } + + /** + * @param $in + * @return bool + */ + private function isUndefined( $in ) { + $this->debug( $in, __FUNCTION__, true ); + return true; + } + + /** + * @param $in + * @return bool + */ + private function isSlong( $in ) { + if ( $this->isLong( abs( $in ) ) ) { + $this->debug( $in, __FUNCTION__, true ); + return true; + } else { + $this->debug( $in, __FUNCTION__, false ); + return false; + } + } + + /** + * @param $in + * @return bool + */ + private function isSrational( $in ) { + $m = array(); + if ( !is_array( $in ) && preg_match( '/^(-?\d+)\/(\d+[1-9]|[1-9]\d*)$/', $in, $m ) ) { # Avoid division by zero + return $this->isSlong( $m[0] ) && $this->isSlong( $m[1] ); + } else { + $this->debug( $in, __FUNCTION__, 'fed a non-fraction value' ); + return false; + } + } + /**#@-*/ + + /** + * Validates if a tag has a legal value according to the Exif spec + * + * @private + * @param $section String: section where tag is located. + * @param $tag String: the tag to check. + * @param $val Mixed: the value of the tag. + * @param $recursive Boolean: true if called recursively for array types. + * @return bool + */ + private function validate( $section, $tag, $val, $recursive = false ) { + $debug = "tag is '$tag'"; + $etype = $this->mExifTags[$section][$tag]; + $ecount = 1; + if( is_array( $etype ) ) { + list( $etype, $ecount ) = $etype; + if ( $recursive ) + $ecount = 1; // checking individual elements + } + $count = count( $val ); + if( $ecount != $count ) { + $this->debug( $val, __FUNCTION__, "Expected $ecount elements for $tag but got $count" ); + return false; + } + if( $count > 1 ) { + foreach( $val as $v ) { + if( !$this->validate( $section, $tag, $v, true ) ) { + return false; + } + } + return true; + } + // Does not work if not typecast + switch( (string)$etype ) { + case (string)Exif::BYTE: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isByte( $val ); + case (string)Exif::ASCII: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isASCII( $val ); + case (string)Exif::SHORT: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isShort( $val ); + case (string)Exif::LONG: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isLong( $val ); + case (string)Exif::RATIONAL: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isRational( $val ); + case (string)Exif::UNDEFINED: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isUndefined( $val ); + case (string)Exif::SLONG: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isSlong( $val ); + case (string)Exif::SRATIONAL: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isSrational( $val ); + case (string)Exif::SHORT.','.Exif::LONG: + $this->debug( $val, __FUNCTION__, $debug ); + return $this->isShort( $val ) || $this->isLong( $val ); + case (string)Exif::IGNORE: + $this->debug( $val, __FUNCTION__, $debug ); + return false; + default: + $this->debug( $val, __FUNCTION__, "The tag '$tag' is unknown" ); + return false; + } + } + + /** + * Convenience function for debugging output + * + * @private + * + * @param $in Mixed: + * @param $fname String: + * @param $action Mixed: , default NULL. + */ + private function debug( $in, $fname, $action = null ) { + if ( !$this->log ) { + return; + } + $type = gettype( $in ); + $class = ucfirst( __CLASS__ ); + if ( $type === 'array' ) { + $in = print_r( $in, true ); + } + + if ( $action === true ) { + wfDebugLog( $this->log, "$class::$fname: accepted: '$in' (type: $type)\n"); + } elseif ( $action === false ) { + wfDebugLog( $this->log, "$class::$fname: rejected: '$in' (type: $type)\n"); + } elseif ( $action === null ) { + wfDebugLog( $this->log, "$class::$fname: input was: '$in' (type: $type)\n"); + } else { + wfDebugLog( $this->log, "$class::$fname: $action (type: $type; content: '$in')\n"); + } + } + + /** + * Convenience function for debugging output + * + * @private + * + * @param $fname String: the name of the function calling this function + * @param $io Boolean: Specify whether we're beginning or ending + */ + private function debugFile( $fname, $io ) { + if ( !$this->log ) { + return; + } + $class = ucfirst( __CLASS__ ); + if ( $io ) { + wfDebugLog( $this->log, "$class::$fname: begin processing: '{$this->basename}'\n" ); + } else { + wfDebugLog( $this->log, "$class::$fname: end processing: '{$this->basename}'\n" ); + } + } +} + diff --git a/includes/media/ExifBitmap.php b/includes/media/ExifBitmap.php new file mode 100644 index 00000000..05ce161b --- /dev/null +++ b/includes/media/ExifBitmap.php @@ -0,0 +1,210 @@ +<?php +/** + * @file + * @ingroup Media + */ + +/** + * Stuff specific to JPEG and (built-in) TIFF handler. + * All metadata related, since both JPEG and TIFF support Exif. + * + * @ingroup Media + */ +class ExifBitmapHandler extends BitmapHandler { + + const BROKEN_FILE = '-1'; // error extracting metadata + const OLD_BROKEN_FILE = '0'; // outdated error extracting metadata. + + function convertMetadataVersion( $metadata, $version = 1 ) { + // basically flattens arrays. + $version = explode(';', $version, 2); + $version = intval($version[0]); + if ( $version < 1 || $version >= 2 ) { + return $metadata; + } + + $avoidHtml = true; + + if ( !is_array( $metadata ) ) { + $metadata = unserialize( $metadata ); + } + if ( !isset( $metadata['MEDIAWIKI_EXIF_VERSION'] ) || $metadata['MEDIAWIKI_EXIF_VERSION'] != 2 ) { + return $metadata; + } + + // Treat Software as a special case because in can contain + // an array of (SoftwareName, Version). + if (isset( $metadata['Software'] ) + && is_array( $metadata['Software'] ) + && is_array( $metadata['Software'][0]) + && isset( $metadata['Software'][0][0] ) + && isset( $metadata['Software'][0][1]) + ) { + $metadata['Software'] = $metadata['Software'][0][0] . ' (Version ' + . $metadata['Software'][0][1] . ')'; + } + + // ContactInfo also has to be dealt with specially + if ( isset( $metadata['Contact'] ) ) { + $metadata['Contact'] = + FormatMetadata::collapseContactInfo( + $metadata['Contact'] ); + } + + foreach ( $metadata as &$val ) { + if ( is_array( $val ) ) { + $val = FormatMetadata::flattenArray( $val, 'ul', $avoidHtml ); + } + } + $metadata['MEDIAWIKI_EXIF_VERSION'] = 1; + return $metadata; + } + + function isMetadataValid( $image, $metadata ) { + global $wgShowEXIF; + if ( !$wgShowEXIF ) { + # Metadata disabled and so an empty field is expected + return self::METADATA_GOOD; + } + if ( $metadata === self::OLD_BROKEN_FILE ) { + # Old special value indicating that there is no EXIF data in the file. + # or that there was an error well extracting the metadata. + wfDebug( __METHOD__ . ": back-compat version\n"); + return self::METADATA_COMPATIBLE; + } + if ( $metadata === self::BROKEN_FILE ) { + return self::METADATA_GOOD; + } + wfSuppressWarnings(); + $exif = unserialize( $metadata ); + wfRestoreWarnings(); + if ( !isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) || + $exif['MEDIAWIKI_EXIF_VERSION'] != Exif::version() ) + { + if ( isset( $exif['MEDIAWIKI_EXIF_VERSION'] ) && + $exif['MEDIAWIKI_EXIF_VERSION'] == 1 ) + { + //back-compatible but old + wfDebug( __METHOD__.": back-compat version\n" ); + return self::METADATA_COMPATIBLE; + } + # Wrong (non-compatible) version + wfDebug( __METHOD__.": wrong version\n" ); + return self::METADATA_BAD; + } + return self::METADATA_GOOD; + } + + /** + * @param $image File + * @return array|bool + */ + function formatMetadata( $image ) { + $metadata = $image->getMetadata(); + if ( $metadata === self::OLD_BROKEN_FILE || + $metadata === self::BROKEN_FILE || + $this->isMetadataValid( $image, $metadata ) === self::METADATA_BAD ) + { + // So we don't try and display metadata from PagedTiffHandler + // for example when using InstantCommons. + return false; + } + + $exif = unserialize( $metadata ); + if ( !$exif ) { + return false; + } + unset( $exif['MEDIAWIKI_EXIF_VERSION'] ); + if ( count( $exif ) == 0 ) { + return false; + } + return $this->formatMetadataHelper( $exif ); + } + + function getMetadataType( $image ) { + return 'exif'; + } + + /** + * Wrapper for base classes ImageHandler::getImageSize() that checks for + * rotation reported from metadata and swaps the sizes to match. + * + * @param File $image + * @param string $path + * @return array + */ + function getImageSize( $image, $path ) { + global $wgEnableAutoRotation; + $gis = parent::getImageSize( $image, $path ); + + // Don't just call $image->getMetadata(); File::getPropsFromPath() calls us with a bogus object. + // This may mean we read EXIF data twice on initial upload. + if ( $wgEnableAutoRotation ) { + $meta = $this->getMetadata( $image, $path ); + $rotation = $this->getRotationForExif( $meta ); + } else { + $rotation = 0; + } + + if ($rotation == 90 || $rotation == 270) { + $width = $gis[0]; + $gis[0] = $gis[1]; + $gis[1] = $width; + } + return $gis; + } + + /** + * On supporting image formats, try to read out the low-level orientation + * of the file and return the angle that the file needs to be rotated to + * be viewed. + * + * This information is only useful when manipulating the original file; + * the width and height we normally work with is logical, and will match + * any produced output views. + * + * @param $file File + * @return int 0, 90, 180 or 270 + */ + public function getRotation( $file ) { + global $wgEnableAutoRotation; + if ( !$wgEnableAutoRotation ) { + return 0; + } + + $data = $file->getMetadata(); + return $this->getRotationForExif( $data ); + } + + /** + * Given a chunk of serialized Exif metadata, return the orientation as + * degrees of rotation. + * + * @param string $data + * @return int 0, 90, 180 or 270 + * @fixme orientation can include flipping as well; see if this is an issue! + */ + protected function getRotationForExif( $data ) { + if ( !$data ) { + return 0; + } + wfSuppressWarnings(); + $data = unserialize( $data ); + wfRestoreWarnings(); + if ( isset( $data['Orientation'] ) ) { + # See http://sylvana.net/jpegcrop/exif_orientation.html + switch ( $data['Orientation'] ) { + case 8: + return 90; + case 3: + return 180; + case 6: + return 270; + default: + return 0; + } + } + return 0; + } +} + diff --git a/includes/media/FormatMetadata.php b/includes/media/FormatMetadata.php new file mode 100644 index 00000000..47fc1adc --- /dev/null +++ b/includes/media/FormatMetadata.php @@ -0,0 +1,1354 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @ingroup Media + * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> + * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason, 2009 Brent Garber, 2010 Brian Wolff + * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License + * @see http://exif.org/Exif2-2.PDF The Exif 2.2 specification + * @file + */ + + +/** + * Format Image metadata values into a human readable form. + * + * Note lots of these messages use the prefix 'exif' even though + * they may not be exif properties. For example 'exif-ImageDescription' + * can be the Exif ImageDescription, or it could be the iptc-iim caption + * property, or it could be the xmp dc:description property. This + * is because these messages should be independent of how the data is + * stored, sine the user doesn't care if the description is stored in xmp, + * exif, etc only that its a description. (Additionally many of these properties + * are merged together following the MWG standard, such that for example, + * exif properties override XMP properties that mean the same thing if + * there is a conflict). + * + * It should perhaps use a prefix like 'metadata' instead, but there + * is already a large number of messages using the 'exif' prefix. + * + * @ingroup Media + */ +class FormatMetadata { + + /** + * Numbers given by Exif user agents are often magical, that is they + * should be replaced by a detailed explanation depending on their + * value which most of the time are plain integers. This function + * formats Exif (and other metadata) values into human readable form. + * + * @param $tags Array: the Exif data to format ( as returned by + * Exif::getFilteredData() or BitmapMetadataHandler ) + * @return array + */ + public static function getFormattedData( $tags ) { + global $wgLang; + + $resolutionunit = !isset( $tags['ResolutionUnit'] ) || $tags['ResolutionUnit'] == 2 ? 2 : 3; + unset( $tags['ResolutionUnit'] ); + + foreach ( $tags as $tag => &$vals ) { + + // This seems ugly to wrap non-array's in an array just to unwrap again, + // especially when most of the time it is not an array + if ( !is_array( $tags[$tag] ) ) { + $vals = Array( $vals ); + } + + // _type is a special value to say what array type + if ( isset( $tags[$tag]['_type'] ) ) { + $type = $tags[$tag]['_type']; + unset( $vals['_type'] ); + } else { + $type = 'ul'; // default unordered list. + } + + //This is done differently as the tag is an array. + if ($tag == 'GPSTimeStamp' && count($vals) === 3) { + //hour min sec array + + $h = explode('/', $vals[0]); + $m = explode('/', $vals[1]); + $s = explode('/', $vals[2]); + + // this should already be validated + // when loaded from file, but it could + // come from a foreign repo, so be + // paranoid. + if ( !isset($h[1]) + || !isset($m[1]) + || !isset($s[1]) + || $h[1] == 0 + || $m[1] == 0 + || $s[1] == 0 + ) { + continue; + } + $tags[$tag] = intval( $h[0] / $h[1] ) + . ':' . str_pad( intval( $m[0] / $m[1] ), 2, '0', STR_PAD_LEFT ) + . ':' . str_pad( intval( $s[0] / $s[1] ), 2, '0', STR_PAD_LEFT ); + + $time = wfTimestamp( TS_MW, '1971:01:01 ' . $tags[$tag] ); + // the 1971:01:01 is just a placeholder, and not shown to user. + if ( $time && intval( $time ) > 0 ) { + $tags[$tag] = $wgLang->time( $time ); + } + continue; + } + + // The contact info is a multi-valued field + // instead of the other props which are single + // valued (mostly) so handle as a special case. + if ( $tag === 'Contact' ) { + $vals = self::collapseContactInfo( $vals ); + continue; + } + + foreach ( $vals as &$val ) { + + switch( $tag ) { + case 'Compression': + switch( $val ) { + case 1: case 2: case 3: case 4: + case 5: case 6: case 7: case 8: + case 32773: case 32946: case 34712: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'PhotometricInterpretation': + switch( $val ) { + case 2: case 6: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'Orientation': + switch( $val ) { + case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'PlanarConfiguration': + switch( $val ) { + case 1: case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + // TODO: YCbCrSubSampling + case 'YCbCrPositioning': + switch ( $val ) { + case 1: + case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'XResolution': + case 'YResolution': + switch( $resolutionunit ) { + case 2: + $val = self::msg( 'XYResolution', 'i', self::formatNum( $val ) ); + break; + case 3: + $val = self::msg( 'XYResolution', 'c', self::formatNum( $val ) ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + // TODO: YCbCrCoefficients #p27 (see annex E) + case 'ExifVersion': case 'FlashpixVersion': + $val = "$val" / 100; + break; + + case 'ColorSpace': + switch( $val ) { + case 1: case 65535: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'ComponentsConfiguration': + switch( $val ) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'DateTime': + case 'DateTimeOriginal': + case 'DateTimeDigitized': + case 'DateTimeReleased': + case 'DateTimeExpires': + case 'GPSDateStamp': + case 'dc-date': + case 'DateTimeMetadata': + if ( $val == '0000:00:00 00:00:00' || $val == ' : : : : ' ) { + $val = wfMsg( 'exif-unknowndate' ); + } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d) (?:\d\d):(?:\d\d):(?:\d\d)$/D', $val ) ) { + $time = wfTimestamp( TS_MW, $val ); + if ( $time && intval( $time ) > 0 ) { + $val = $wgLang->timeanddate( $time ); + } + } elseif ( preg_match( '/^(?:\d{4}):(?:\d\d):(?:\d\d)$/D', $val ) ) { + // If only the date but not the time is filled in. + $time = wfTimestamp( TS_MW, substr( $val, 0, 4 ) + . substr( $val, 5, 2 ) + . substr( $val, 8, 2 ) + . '000000' ); + if ( $time && intval( $time ) > 0 ) { + $val = $wgLang->date( $time ); + } + } + // else it will just output $val without formatting it. + break; + + case 'ExposureProgram': + switch( $val ) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'SubjectDistance': + $val = self::msg( $tag, '', self::formatNum( $val ) ); + break; + + case 'MeteringMode': + switch( $val ) { + case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 255: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'LightSource': + switch( $val ) { + case 0: case 1: case 2: case 3: case 4: case 9: case 10: case 11: + case 12: case 13: case 14: case 15: case 17: case 18: case 19: case 20: + case 21: case 22: case 23: case 24: case 255: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'Flash': + $flashDecode = array( + 'fired' => $val & bindec( '00000001' ), + 'return' => ( $val & bindec( '00000110' ) ) >> 1, + 'mode' => ( $val & bindec( '00011000' ) ) >> 3, + 'function' => ( $val & bindec( '00100000' ) ) >> 5, + 'redeye' => ( $val & bindec( '01000000' ) ) >> 6, +// 'reserved' => ($val & bindec( '10000000' )) >> 7, + ); + + # We do not need to handle unknown values since all are used. + foreach ( $flashDecode as $subTag => $subValue ) { + # We do not need any message for zeroed values. + if ( $subTag != 'fired' && $subValue == 0 ) { + continue; + } + $fullTag = $tag . '-' . $subTag ; + $flashMsgs[] = self::msg( $fullTag, $subValue ); + } + $val = $wgLang->commaList( $flashMsgs ); + break; + + case 'FocalPlaneResolutionUnit': + switch( $val ) { + case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'SensingMethod': + switch( $val ) { + case 1: case 2: case 3: case 4: case 5: case 7: case 8: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'FileSource': + switch( $val ) { + case 3: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'SceneType': + switch( $val ) { + case 1: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'CustomRendered': + switch( $val ) { + case 0: case 1: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'ExposureMode': + switch( $val ) { + case 0: case 1: case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'WhiteBalance': + switch( $val ) { + case 0: case 1: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'SceneCaptureType': + switch( $val ) { + case 0: case 1: case 2: case 3: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GainControl': + switch( $val ) { + case 0: case 1: case 2: case 3: case 4: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'Contrast': + switch( $val ) { + case 0: case 1: case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'Saturation': + switch( $val ) { + case 0: case 1: case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'Sharpness': + switch( $val ) { + case 0: case 1: case 2: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'SubjectDistanceRange': + switch( $val ) { + case 0: case 1: case 2: case 3: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + //The GPS...Ref values are kept for compatibility, probably won't be reached. + case 'GPSLatitudeRef': + case 'GPSDestLatitudeRef': + switch( $val ) { + case 'N': case 'S': + $val = self::msg( 'GPSLatitude', $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSLongitudeRef': + case 'GPSDestLongitudeRef': + switch( $val ) { + case 'E': case 'W': + $val = self::msg( 'GPSLongitude', $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSAltitude': + if ( $val < 0 ) { + $val = self::msg( 'GPSAltitude', 'below-sealevel', self::formatNum( -$val, 3 ) ); + } else { + $val = self::msg( 'GPSAltitude', 'above-sealevel', self::formatNum( $val, 3 ) ); + } + break; + + case 'GPSStatus': + switch( $val ) { + case 'A': case 'V': + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSMeasureMode': + switch( $val ) { + case 2: case 3: + $val = self::msg( $tag, $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + + case 'GPSTrackRef': + case 'GPSImgDirectionRef': + case 'GPSDestBearingRef': + switch( $val ) { + case 'T': case 'M': + $val = self::msg( 'GPSDirection', $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSLatitude': + case 'GPSDestLatitude': + $val = self::formatCoords( $val, 'latitude' ); + break; + case 'GPSLongitude': + case 'GPSDestLongitude': + $val = self::formatCoords( $val, 'longitude' ); + break; + + case 'GPSSpeedRef': + switch( $val ) { + case 'K': case 'M': case 'N': + $val = self::msg( 'GPSSpeed', $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSDestDistanceRef': + switch( $val ) { + case 'K': case 'M': case 'N': + $val = self::msg( 'GPSDestDistance', $val ); + break; + default: + /* If not recognized, display as is. */ + break; + } + break; + + case 'GPSDOP': + // See http://en.wikipedia.org/wiki/Dilution_of_precision_(GPS) + if ( $val <= 2 ) { + $val = self::msg( $tag, 'excellent', self::formatNum( $val ) ); + } elseif ( $val <= 5 ) { + $val = self::msg( $tag, 'good', self::formatNum( $val ) ); + } elseif ( $val <= 10 ) { + $val = self::msg( $tag, 'moderate', self::formatNum( $val ) ); + } elseif ( $val <= 20 ) { + $val = self::msg( $tag, 'fair', self::formatNum( $val ) ); + } else { + $val = self::msg( $tag, 'poor', self::formatNum( $val ) ); + } + break; + + // This is not in the Exif standard, just a special + // case for our purposes which enables wikis to wikify + // the make, model and software name to link to their articles. + case 'Make': + case 'Model': + $val = self::msg( $tag, '', $val ); + break; + + case 'Software': + if ( is_array( $val ) ) { + //if its a software, version array. + $val = wfMsg( 'exif-software-version-value', $val[0], $val[1] ); + } else { + $val = self::msg( $tag, '', $val ); + } + break; + + case 'ExposureTime': + // Show the pretty fraction as well as decimal version + $val = wfMsg( 'exif-exposuretime-format', + self::formatFraction( $val ), self::formatNum( $val ) ); + break; + case 'ISOSpeedRatings': + // If its = 65535 that means its at the + // limit of the size of Exif::short and + // is really higher. + if ( $val == '65535' ) { + $val = self::msg( $tag, 'overflow' ); + } else { + $val = self::formatNum( $val ); + } + break; + case 'FNumber': + $val = wfMsg( 'exif-fnumber-format', + self::formatNum( $val ) ); + break; + + case 'FocalLength': case 'FocalLengthIn35mmFilm': + $val = wfMsg( 'exif-focallength-format', + self::formatNum( $val ) ); + break; + + case 'MaxApertureValue': + if ( strpos( $val, '/' ) !== false ) { + // need to expand this earlier to calculate fNumber + list($n, $d) = explode('/', $val); + if ( is_numeric( $n ) && is_numeric( $d ) ) { + $val = $n / $d; + } + } + if ( is_numeric( $val ) ) { + $fNumber = pow( 2, $val / 2 ); + if ( $fNumber !== false ) { + $val = wfMsg( 'exif-maxaperturevalue-value', + self::formatNum( $val ), + self::formatNum( $fNumber, 2 ) + ); + } + } + break; + + case 'iimCategory': + switch( strtolower( $val ) ) { + // See pg 29 of IPTC photo + // metadata standard. + case 'ace': case 'clj': + case 'dis': case 'fin': + case 'edu': case 'evn': + case 'hth': case 'hum': + case 'lab': case 'lif': + case 'pol': case 'rel': + case 'sci': case 'soi': + case 'spo': case 'war': + case 'wea': + $val = self::msg( + 'iimcategory', + $val + ); + } + break; + case 'SubjectNewsCode': + // Essentially like iimCategory. + // 8 (numeric) digit hierarchical + // classification. We decode the + // first 2 digits, which provide + // a broad category. + $val = self::convertNewsCode( $val ); + break; + case 'Urgency': + // 1-8 with 1 being highest, 5 normal + // 0 is reserved, and 9 is 'user-defined'. + $urgency = ''; + if ( $val == 0 || $val == 9 ) { + $urgency = 'other'; + } elseif ( $val < 5 && $val > 1 ) { + $urgency = 'high'; + } elseif ( $val == 5 ) { + $urgency = 'normal'; + } elseif ( $val <= 8 && $val > 5) { + $urgency = 'low'; + } + + if ( $urgency !== '' ) { + $val = self::msg( 'urgency', + $urgency, $val + ); + } + break; + + // Things that have a unit of pixels. + case 'OriginalImageHeight': + case 'OriginalImageWidth': + case 'PixelXDimension': + case 'PixelYDimension': + case 'ImageWidth': + case 'ImageLength': + $val = self::formatNum( $val ) . ' ' . wfMsg( 'unit-pixel' ); + break; + + // Do not transform fields with pure text. + // For some languages the formatNum() + // conversion results to wrong output like + // foo,bar@example,com or foo٫bar@example٫com. + // Also some 'numeric' things like Scene codes + // are included here as we really don't want + // commas inserted. + case 'ImageDescription': + case 'Artist': + case 'Copyright': + case 'RelatedSoundFile': + case 'ImageUniqueID': + case 'SpectralSensitivity': + case 'GPSSatellites': + case 'GPSVersionID': + case 'GPSMapDatum': + case 'Keywords': + case 'WorldRegionDest': + case 'CountryDest': + case 'CountryCodeDest': + case 'ProvinceOrStateDest': + case 'CityDest': + case 'SublocationDest': + case 'WorldRegionCreated': + case 'CountryCreated': + case 'CountryCodeCreated': + case 'ProvinceOrStateCreated': + case 'CityCreated': + case 'SublocationCreated': + case 'ObjectName': + case 'SpecialInstructions': + case 'Headline': + case 'Credit': + case 'Source': + case 'EditStatus': + case 'FixtureIdentifier': + case 'LocationDest': + case 'LocationDestCode': + case 'Writer': + case 'JPEGFileComment': + case 'iimSupplementalCategory': + case 'OriginalTransmissionRef': + case 'Identifier': + case 'dc-contributor': + case 'dc-coverage': + case 'dc-publisher': + case 'dc-relation': + case 'dc-rights': + case 'dc-source': + case 'dc-type': + case 'Lens': + case 'SerialNumber': + case 'CameraOwnerName': + case 'Label': + case 'Nickname': + case 'RightsCertificate': + case 'CopyrightOwner': + case 'UsageTerms': + case 'WebStatement': + case 'OriginalDocumentID': + case 'LicenseUrl': + case 'MorePermissionsUrl': + case 'AttributionUrl': + case 'PreferredAttributionName': + case 'PNGFileComment': + case 'Disclaimer': + case 'ContentWarning': + case 'GIFFileComment': + case 'SceneCode': + case 'IntellectualGenre': + case 'Event': + case 'OrginisationInImage': + case 'PersonInImage': + + $val = htmlspecialchars( $val ); + break; + + case 'ObjectCycle': + switch ( $val ) { + case 'a': case 'p': case 'b': + $val = self::msg( $tag, $val ); + break; + default: + $val = htmlspecialchars( $val ); + break; + } + break; + case 'Copyrighted': + switch( $val ) { + case 'True': case 'False': + $val = self::msg( $tag, $val ); + break; + } + break; + case 'Rating': + if ( $val == '-1' ) { + $val = self::msg( $tag, 'rejected' ); + } else { + $val = self::formatNum( $val ); + } + break; + + case 'LanguageCode': + $lang = $wgLang->getLanguageName( strtolower( $val ) ); + if ($lang) { + $val = htmlspecialchars( $lang ); + } else { + $val = htmlspecialchars( $val ); + } + break; + + default: + $val = self::formatNum( $val ); + break; + } + } + // End formatting values, start flattening arrays. + $vals = self::flattenArray( $vals, $type ); + + } + return $tags; + } + + /** + * A function to collapse multivalued tags into a single value. + * This turns an array of (for example) authors into a bulleted list. + * + * This is public on the basis it might be useful outside of this class. + * + * @param $vals Array array of values + * @param $type String Type of array (either lang, ul, ol). + * lang = language assoc array with keys being the lang code + * ul = unordered list, ol = ordered list + * type can also come from the '_type' member of $vals. + * @param $noHtml Boolean If to avoid returning anything resembling + * html. (Ugly hack for backwards compatibility with old mediawiki). + * @return String single value (in wiki-syntax). + */ + public static function flattenArray( $vals, $type = 'ul', $noHtml = false ) { + if ( isset( $vals['_type'] ) ) { + $type = $vals['_type']; + unset( $vals['_type'] ); + } + + if ( !is_array( $vals ) ) { + return $vals; // do nothing if not an array; + } + elseif ( count( $vals ) === 1 && $type !== 'lang' ) { + return $vals[0]; + } + elseif ( count( $vals ) === 0 ) { + wfDebug( __METHOD__ . ' metadata array with 0 elements!' ); + return ""; // paranoia. This should never happen + } + /* @todo FIXME: This should hide some of the list entries if there are + * say more than four. Especially if a field is translated into 20 + * languages, we don't want to show them all by default + */ + else { + global $wgContLang; + switch( $type ) { + case 'lang': + // Display default, followed by ContLang, + // followed by the rest in no particular + // order. + + // Todo: hide some items if really long list. + + $content = ''; + + $cLang = $wgContLang->getCode(); + $defaultItem = false; + $defaultLang = false; + + // If default is set, save it for later, + // as we don't know if it's equal to + // one of the lang codes. (In xmp + // you specify the language for a + // default property by having both + // a default prop, and one in the language + // that are identical) + if ( isset( $vals['x-default'] ) ) { + $defaultItem = $vals['x-default']; + unset( $vals['x-default'] ); + } + // Do contentLanguage. + if ( isset( $vals[$cLang] ) ) { + $isDefault = false; + if ( $vals[$cLang] === $defaultItem ) { + $defaultItem = false; + $isDefault = true; + } + $content .= self::langItem( + $vals[$cLang], $cLang, + $isDefault, $noHtml ); + + unset( $vals[$cLang] ); + } + + // Now do the rest. + foreach ( $vals as $lang => $item ) { + if ( $item === $defaultItem ) { + $defaultLang = $lang; + continue; + } + $content .= self::langItem( $item, + $lang, false, $noHtml ); + } + if ( $defaultItem !== false ) { + $content = self::langItem( $defaultItem, + $defaultLang, true, $noHtml ) + . $content; + } + if ( $noHtml ) { + return $content; + } + return '<ul class="metadata-langlist">' . + $content . + '</ul>'; + case 'ol': + if ( $noHtml ) { + return "\n#" . implode( "\n#", $vals ); + } + return "<ol><li>" . implode( "</li>\n<li>", $vals ) . '</li></ol>'; + case 'ul': + default: + if ( $noHtml ) { + return "\n*" . implode( "\n*", $vals ); + } + return "<ul><li>" . implode( "</li>\n<li>", $vals ) . '</li></ul>'; + } + } + } + + /** Helper function for creating lists of translations. + * + * @param $value String value (this is not escaped) + * @param $lang String lang code of item or false + * @param $default Boolean if it is default value. + * @param $noHtml Boolean If to avoid html (for back-compat) + * @return language item (Note: despite how this looks, + * this is treated as wikitext not html). + */ + private static function langItem( $value, $lang, $default = false, $noHtml = false ) { + global $wgContLang; + if ( $lang === false && $default === false) { + throw new MWException('$lang and $default cannot both ' + . 'be false.'); + } + + if ( $noHtml ) { + $wrappedValue = $value; + } else { + $wrappedValue = '<span class="mw-metadata-lang-value">' + . $value . '</span>'; + } + + if ( $lang === false ) { + if ( $noHtml ) { + return wfMsg( 'metadata-langitem-default', + $wrappedValue ) . "\n\n"; + } /* else */ + return '<li class="mw-metadata-lang-default">' + . wfMsg( 'metadata-langitem-default', + $wrappedValue ) + . "</li>\n"; + } + + $lowLang = strtolower( $lang ); + $langName = $wgContLang->getLanguageName( $lowLang ); + if ( $langName === '' ) { + //try just the base language name. (aka en-US -> en ). + list( $langPrefix ) = explode( '-', $lowLang, 2 ); + $langName = $wgContLang->getLanguageName( $langPrefix ); + if ( $langName === '' ) { + // give up. + $langName = $lang; + } + } + // else we have a language specified + + if ( $noHtml ) { + return '*' . wfMsg( 'metadata-langitem', + $wrappedValue, $langName, $lang ); + } /* else: */ + + $item = '<li class="mw-metadata-lang-code-' + . $lang; + if ( $default ) { + $item .= ' mw-metadata-lang-default'; + } + $item .= '" lang="' . $lang . '">'; + $item .= wfMsg( 'metadata-langitem', + $wrappedValue, $langName, $lang ); + $item .= "</li>\n"; + return $item; + } + + /** + * Convenience function for getFormattedData() + * + * @private + * + * @param $tag String: the tag name to pass on + * @param $val String: the value of the tag + * @param $arg String: an argument to pass ($1) + * @param $arg2 String: a 2nd argument to pass ($2) + * @return string A wfMsg of "exif-$tag-$val" in lower case + */ + static function msg( $tag, $val, $arg = null, $arg2 = null ) { + global $wgContLang; + + if ($val === '') + $val = 'value'; + return wfMsg( $wgContLang->lc( "exif-$tag-$val" ), $arg, $arg2 ); + } + + /** + * Format a number, convert numbers from fractions into floating point + * numbers, joins arrays of numbers with commas. + * + * @private + * + * @param $num Mixed: the value to format + * @param $round digits to round to or false. + * @return mixed A floating point number or whatever we were fed + */ + static function formatNum( $num, $round = false ) { + global $wgLang; + $m = array(); + if( is_array($num) ) { + $out = array(); + foreach( $num as $number ) { + $out[] = self::formatNum($number); + } + return $wgLang->commaList( $out ); + } + if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) { + if ( $m[2] != 0 ) { + $newNum = $m[1] / $m[2]; + if ( $round !== false ) { + $newNum = round( $newNum, $round ); + } + } else { + $newNum = $num; + } + + return $wgLang->formatNum( $newNum ); + } else { + if ( is_numeric( $num ) && $round !== false ) { + $num = round( $num, $round ); + } + return $wgLang->formatNum( $num ); + } + } + + /** + * Format a rational number, reducing fractions + * + * @private + * + * @param $num Mixed: the value to format + * @return mixed A floating point number or whatever we were fed + */ + static function formatFraction( $num ) { + $m = array(); + if ( preg_match( '/^(-?\d+)\/(\d+)$/', $num, $m ) ) { + $numerator = intval( $m[1] ); + $denominator = intval( $m[2] ); + $gcd = self::gcd( abs( $numerator ), $denominator ); + if( $gcd != 0 ) { + // 0 shouldn't happen! ;) + return self::formatNum( $numerator / $gcd ) . '/' . self::formatNum( $denominator / $gcd ); + } + } + return self::formatNum( $num ); + } + + /** + * Calculate the greatest common divisor of two integers. + * + * @param $a Integer: Numerator + * @param $b Integer: Denominator + * @return int + * @private + */ + static function gcd( $a, $b ) { + /* + // http://en.wikipedia.org/wiki/Euclidean_algorithm + // Recursive form would be: + if( $b == 0 ) + return $a; + else + return gcd( $b, $a % $b ); + */ + while( $b != 0 ) { + $remainder = $a % $b; + + // tail recursion... + $a = $b; + $b = $remainder; + } + return $a; + } + + /** Fetch the human readable version of a news code. + * A news code is an 8 digit code. The first two + * digits are a general classification, so we just + * translate that. + * + * Note, leading 0's are significant, so this is + * a string, not an int. + * + * @param $val String: The 8 digit news code. + * @return The human readable form + */ + static private function convertNewsCode( $val ) { + if ( !preg_match( '/^\d{8}$/D', $val ) ) { + // Not a valid news code. + return $val; + } + $cat = ''; + switch( substr( $val , 0, 2 ) ) { + case '01': + $cat = 'ace'; + break; + case '02': + $cat = 'clj'; + break; + case '03': + $cat = 'dis'; + break; + case '04': + $cat = 'fin'; + break; + case '05': + $cat = 'edu'; + break; + case '06': + $cat = 'evn'; + break; + case '07': + $cat = 'hth'; + break; + case '08': + $cat = 'hum'; + break; + case '09': + $cat = 'lab'; + break; + case '10': + $cat = 'lif'; + break; + case '11': + $cat = 'pol'; + break; + case '12': + $cat = 'rel'; + break; + case '13': + $cat = 'sci'; + break; + case '14': + $cat = 'soi'; + break; + case '15': + $cat = 'spo'; + break; + case '16': + $cat = 'war'; + break; + case '17': + $cat = 'wea'; + break; + } + if ( $cat !== '' ) { + $catMsg = self::msg( 'iimcategory', $cat ); + $val = self::msg( 'subjectnewscode', '', $val, $catMsg ); + } + return $val; + } + + /** + * Format a coordinate value, convert numbers from floating point + * into degree minute second representation. + * + * @param $coords Array: degrees, minutes and seconds + * @param $type String: latitude or longitude (for if its a NWS or E) + * @return mixed A floating point number or whatever we were fed + */ + static function formatCoords( $coord, $type ) { + $ref = ''; + if ( $coord < 0 ) { + $nCoord = -$coord; + if ( $type === 'latitude' ) { + $ref = 'S'; + } + elseif ( $type === 'longitude' ) { + $ref = 'W'; + } + } + else { + $nCoord = $coord; + if ( $type === 'latitude' ) { + $ref = 'N'; + } + elseif ( $type === 'longitude' ) { + $ref = 'E'; + } + } + + $deg = floor( $nCoord ); + $min = floor( ( $nCoord - $deg ) * 60.0 ); + $sec = round( ( ( $nCoord - $deg ) - $min / 60 ) * 3600, 2 ); + + $deg = self::formatNum( $deg ); + $min = self::formatNum( $min ); + $sec = self::formatNum( $sec ); + + return wfMsg( 'exif-coordinate-format', $deg, $min, $sec, $ref, $coord ); + } + + /** + * Format the contact info field into a single value. + * + * @param $vals Array array with fields of the ContactInfo + * struct defined in the IPTC4XMP spec. Or potentially + * an array with one element that is a free form text + * value from the older iptc iim 1:118 prop. + * + * This function might be called from + * JpegHandler::convertMetadataVersion which is why it is + * public. + * + * @return String of html-ish looking wikitext + */ + public static function collapseContactInfo( $vals ) { + if( ! ( isset( $vals['CiAdrExtadr'] ) + || isset( $vals['CiAdrCity'] ) + || isset( $vals['CiAdrCtry'] ) + || isset( $vals['CiEmailWork'] ) + || isset( $vals['CiTelWork'] ) + || isset( $vals['CiAdrPcode'] ) + || isset( $vals['CiAdrRegion'] ) + || isset( $vals['CiUrlWork'] ) + ) ) { + // We don't have any sub-properties + // This could happen if its using old + // iptc that just had this as a free-form + // text value. + // Note: We run this through htmlspecialchars + // partially to be consistent, and partially + // because people often insert >, etc into + // the metadata which should not be interpreted + // but we still want to auto-link urls. + foreach( $vals as &$val ) { + $val = htmlspecialchars( $val ); + } + return self::flattenArray( $vals ); + } else { + // We have a real ContactInfo field. + // Its unclear if all these fields have to be + // set, so assume they do not. + $url = $tel = $street = $city = $country = ''; + $email = $postal = $region = ''; + + // Also note, some of the class names this uses + // are similar to those used by hCard. This is + // mostly because they're sensible names. This + // does not (and does not attempt to) output + // stuff in the hCard microformat. However it + // might output in the adr microformat. + + if ( isset( $vals['CiAdrExtadr'] ) ) { + // Todo: This can potentially be multi-line. + // Need to check how that works in XMP. + $street = '<span class="extended-address">' + . htmlspecialchars( + $vals['CiAdrExtadr'] ) + . '</span>'; + } + if ( isset( $vals['CiAdrCity'] ) ) { + $city = '<span class="locality">' + . htmlspecialchars( $vals['CiAdrCity'] ) + . '</span>'; + } + if ( isset( $vals['CiAdrCtry'] ) ) { + $country = '<span class="country-name">' + . htmlspecialchars( $vals['CiAdrCtry'] ) + . '</span>'; + } + if ( isset( $vals['CiEmailWork'] ) ) { + $emails = array(); + // Have to split multiple emails at commas/new lines. + $splitEmails = explode( "\n", $vals['CiEmailWork'] ); + foreach ( $splitEmails as $e1 ) { + // Also split on comma + foreach ( explode( ',', $e1 ) as $e2 ) { + $finalEmail = trim( $e2 ); + if ( $finalEmail == ',' || $finalEmail == '' ) { + continue; + } + if ( strpos( $finalEmail, '<' ) !== false ) { + // Don't do fancy formatting to + // "My name" <foo@bar.com> style stuff + $emails[] = $finalEmail; + } else { + $emails[] = '[mailto:' + . $finalEmail + . ' <span class="email">' + . $finalEmail + . '</span>]'; + } + } + } + $email = implode( ', ', $emails ); + } + if ( isset( $vals['CiTelWork'] ) ) { + $tel = '<span class="tel">' + . htmlspecialchars( $vals['CiTelWork'] ) + . '</span>'; + } + if ( isset( $vals['CiAdrPcode'] ) ) { + $postal = '<span class="postal-code">' + . htmlspecialchars( + $vals['CiAdrPcode'] ) + . '</span>'; + } + if ( isset( $vals['CiAdrRegion'] ) ) { + // Note this is province/state. + $region = '<span class="region">' + . htmlspecialchars( + $vals['CiAdrRegion'] ) + . '</span>'; + } + if ( isset( $vals['CiUrlWork'] ) ) { + $url = '<span class="url">' + . htmlspecialchars( $vals['CiUrlWork'] ) + . '</span>'; + } + return wfMsg( 'exif-contact-value', $email, $url, + $street, $city, $region, $postal, $country, + $tel ); + } + } +} + +/** For compatability with old FormatExif class + * which some extensions use. + * + * @deprecated since 1.18 + * +**/ +class FormatExif { + var $meta; + function FormatExif ( $meta ) { + wfDeprecated(__METHOD__); + $this->meta = $meta; + } + + function getFormattedData ( ) { + return FormatMetadata::getFormattedData( $this->meta ); + } +} diff --git a/includes/media/GIF.php b/includes/media/GIF.php index c4ede331..325ceb9a 100644 --- a/includes/media/GIF.php +++ b/includes/media/GIF.php @@ -12,56 +12,104 @@ * @ingroup Media */ class GIFHandler extends BitmapHandler { + + const BROKEN_FILE = '0'; // value to store in img_metadata if error extracting metadata. function getMetadata( $image, $filename ) { - if ( !isset( $image->parsedGIFMetadata ) ) { - try { - $image->parsedGIFMetadata = GIFMetadataExtractor::getMetadata( $filename ); - } catch( Exception $e ) { - // Broken file? - wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); - return '0'; - } + try { + $parsedGIFMetadata = BitmapMetadataHandler::GIF( $filename ); + } catch( Exception $e ) { + // Broken file? + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); + return self::BROKEN_FILE; } - return serialize( $image->parsedGIFMetadata ); - + return serialize($parsedGIFMetadata); } - + + /** + * @param $image File + * @return array|bool + */ function formatMetadata( $image ) { - return false; + $meta = $image->getMetadata(); + + if ( !$meta ) { + return false; + } + $meta = unserialize( $meta ); + if ( !isset( $meta['metadata'] ) || count( $meta['metadata'] ) <= 1 ) { + return false; + } + + if ( isset( $meta['metadata']['_MW_GIF_VERSION'] ) ) { + unset( $meta['metadata']['_MW_GIF_VERSION'] ); + } + return $this->formatMetadataHelper( $meta['metadata'] ); } - + + /** + * @param $image File + * @param $width + * @param $height + * @return + */ function getImageArea( $image, $width, $height ) { $ser = $image->getMetadata(); - if ($ser) { - $metadata = unserialize($ser); + if ( $ser ) { + $metadata = unserialize( $ser ); return $width * $height * $metadata['frameCount']; } else { return $width * $height; } } + /** + * @param $image File + * @return bool + */ function isAnimatedImage( $image ) { $ser = $image->getMetadata(); - if ($ser) { + if ( $ser ) { $metadata = unserialize($ser); - if( $metadata['frameCount'] > 1 ) return true; + if( $metadata['frameCount'] > 1 ) { + return true; + } } return false; } - + function getMetadataType( $image ) { return 'parsed-gif'; } - + function isMetadataValid( $image, $metadata ) { + if ( $metadata === self::BROKEN_FILE ) { + // Do not repetitivly regenerate metadata on broken file. + return self::METADATA_GOOD; + } + wfSuppressWarnings(); $data = unserialize( $metadata ); wfRestoreWarnings(); - return (boolean) $data; + + if ( !$data || !is_array( $data ) ) { + wfDebug(__METHOD__ . ' invalid GIF metadata' ); + return self::METADATA_BAD; + } + + if ( !isset( $data['metadata']['_MW_GIF_VERSION'] ) + || $data['metadata']['_MW_GIF_VERSION'] != GIFMetadataExtractor::VERSION ) { + wfDebug(__METHOD__ . ' old but compatible GIF metadata' ); + return self::METADATA_COMPATIBLE; + } + return self::METADATA_GOOD; } + /** + * @param $image File + * @return string + */ function getLongDesc( $image ) { global $wgLang; @@ -71,20 +119,25 @@ class GIFHandler extends BitmapHandler { $metadata = unserialize($image->getMetadata()); wfRestoreWarnings(); - if (!$metadata || $metadata['frameCount'] <= 1) + if (!$metadata || $metadata['frameCount'] <= 1) { return $original; - + } + + /* Preserve original image info string, but strip the last char ')' so we can add even more */ $info = array(); $info[] = $original; - if ($metadata['looped']) + if ( $metadata['looped'] ) { $info[] = wfMsgExt( 'file-info-gif-looped', 'parseinline' ); + } - if ($metadata['frameCount'] > 1) + if ( $metadata['frameCount'] > 1 ) { $info[] = wfMsgExt( 'file-info-gif-frames', 'parseinline', $metadata['frameCount'] ); + } - if ($metadata['duration']) + if ( $metadata['duration'] ) { $info[] = $wgLang->formatTimePeriod( $metadata['duration'] ); + } return $wgLang->commaList( $info ); } diff --git a/includes/media/GIFMetadataExtractor.php b/includes/media/GIFMetadataExtractor.php index bc1a4804..5dbeb8f8 100644 --- a/includes/media/GIFMetadataExtractor.php +++ b/includes/media/GIFMetadataExtractor.php @@ -21,164 +21,294 @@ class GIFMetadataExtractor { static $gif_extension_sep; static $gif_term; + const VERSION = 1; + + // Each sub-block is less than or equal to 255 bytes. + // Most of the time its 255 bytes, except for in XMP + // blocks, where it's usually between 32-127 bytes each. + const MAX_SUBBLOCKS = 262144; // 5mb divided by 20. + + /** + * @throws Exception + * @param $filename string + * @return array + */ static function getMetadata( $filename ) { self::$gif_frame_sep = pack( "C", ord("," ) ); self::$gif_extension_sep = pack( "C", ord("!" ) ); self::$gif_term = pack( "C", ord(";" ) ); - + $frameCount = 0; $duration = 0.0; $isLooped = false; + $xmp = ""; + $comment = array(); - if (!$filename) + if ( !$filename ) { throw new Exception( "No file name specified" ); - elseif ( !file_exists($filename) || is_dir($filename) ) + } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { throw new Exception( "File $filename does not exist" ); - - $fh = fopen( $filename, 'r' ); - - if (!$fh) + } + + $fh = fopen( $filename, 'rb' ); + + if ( !$fh ) { throw new Exception( "Unable to open file $filename" ); - + } + // Check for the GIF header $buf = fread( $fh, 6 ); if ( !($buf == 'GIF87a' || $buf == 'GIF89a') ) { throw new Exception( "Not a valid GIF file; header: $buf" ); } - + // Skip over width and height. fread( $fh, 4 ); - + // Read BPP $buf = fread( $fh, 1 ); $bpp = self::decodeBPP( $buf ); - + // Skip over background and aspect ratio fread( $fh, 2 ); - + // Skip over the GCT self::readGCT( $fh, $bpp ); - + while( !feof( $fh ) ) { $buf = fread( $fh, 1 ); - + if ($buf == self::$gif_frame_sep) { // Found a frame $frameCount++; - + ## Skip bounding box fread( $fh, 8 ); - + ## Read BPP $buf = fread( $fh, 1 ); $bpp = self::decodeBPP( $buf ); - + ## Read GCT self::readGCT( $fh, $bpp ); fread( $fh, 1 ); self::skipBlock( $fh ); } elseif ( $buf == self::$gif_extension_sep ) { $buf = fread( $fh, 1 ); + if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" ); $extension_code = unpack( 'C', $buf ); $extension_code = $extension_code[1]; - + if ($extension_code == 0xF9) { // Graphics Control Extension. fread( $fh, 1 ); // Block size - + fread( $fh, 1 ); // Transparency, disposal method, user input - + $buf = fread( $fh, 2 ); // Delay, in hundredths of seconds. + if ( strlen( $buf ) < 2 ) throw new Exception( "Ran out of input" ); $delay = unpack( 'v', $buf ); $delay = $delay[1]; $duration += $delay * 0.01; - + fread( $fh, 1 ); // Transparent colour index - + $term = fread( $fh, 1 ); // Should be a terminator + if ( strlen( $term ) < 1 ) throw new Exception( "Ran out of input" ); $term = unpack( 'C', $term ); $term = $term[1]; - if ($term != 0 ) + if ($term != 0 ) { throw new Exception( "Malformed Graphics Control Extension block" ); + } + } elseif ($extension_code == 0xFE) { + // Comment block(s). + $data = self::readBlock( $fh ); + if ( $data === "" ) { + throw new Exception( 'Read error, zero-length comment block' ); + } + + // The standard says this should be ASCII, however its unclear if + // thats true in practise. Check to see if its valid utf-8, if so + // assume its that, otherwise assume its windows-1252 (iso-8859-1) + $dataCopy = $data; + // quickIsNFCVerify has the side effect of replacing any invalid characters + UtfNormal::quickIsNFCVerify( $dataCopy ); + + if ( $dataCopy !== $data ) { + wfSuppressWarnings(); + $data = iconv( 'windows-1252', 'UTF-8', $data ); + wfRestoreWarnings(); + } + + $commentCount = count( $comment ); + if ( $commentCount === 0 + || $comment[$commentCount-1] !== $data ) + { + // Some applications repeat the same comment on each + // frame of an animated GIF image, so if this comment + // is identical to the last, only extract once. + $comment[] = $data; + } } elseif ($extension_code == 0xFF) { // Application extension (Netscape info about the animated gif) + // or XMP (or theoretically any other type of extension block) $blockLength = fread( $fh, 1 ); + if ( strlen( $blockLength ) < 1 ) throw new Exception( "Ran out of input" ); $blockLength = unpack( 'C', $blockLength ); $blockLength = $blockLength[1]; $data = fread( $fh, $blockLength ); - - // NETSCAPE2.0 (application name) - if ($blockLength != 11 || $data != 'NETSCAPE2.0') { + + if ($blockLength != 11 ) { + wfDebug( __METHOD__ . ' GIF application block with wrong length' ); fseek( $fh, -($blockLength + 1), SEEK_CUR ); self::skipBlock( $fh ); continue; } + + // NETSCAPE2.0 (application name for animated gif) + if ( $data == 'NETSCAPE2.0' ) { - $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01 - - if ($data != "\x03\x01") { - throw new Exception( "Expected \x03\x01, got $data" ); - } - - // Unsigned little-endian integer, loop count or zero for "forever" - $loopData = fread( $fh, 2 ); - $loopData = unpack( 'v', $loopData ); - $loopCount = $loopData[1]; - - if ($loopCount != 1) { - $isLooped = true; + $data = fread( $fh, 2 ); // Block length and introduction, should be 03 01 + + if ($data != "\x03\x01") { + throw new Exception( "Expected \x03\x01, got $data" ); + } + + // Unsigned little-endian integer, loop count or zero for "forever" + $loopData = fread( $fh, 2 ); + if ( strlen( $loopData ) < 2 ) throw new Exception( "Ran out of input" ); + $loopData = unpack( 'v', $loopData ); + $loopCount = $loopData[1]; + + if ($loopCount != 1) { + $isLooped = true; + } + + // Read out terminator byte + fread( $fh, 1 ); + } elseif ( $data == 'XMP DataXMP' ) { + // application name for XMP data. + // see pg 18 of XMP spec part 3. + + $xmp = self::readBlock( $fh, true ); + + if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE" + || substr( $xmp, -4 ) !== "\x03\x02\x01\x00" ) + { + // this is just a sanity check. + throw new Exception( "XMP does not have magic trailer!" ); + } + + // strip out trailer. + $xmp = substr( $xmp, 0, -257 ); + + } else { + // unrecognized extension block + fseek( $fh, -($blockLength + 1), SEEK_CUR ); + self::skipBlock( $fh ); + continue; } - - // Read out terminator byte - fread( $fh, 1 ); } else { self::skipBlock( $fh ); } } elseif ( $buf == self::$gif_term ) { break; } else { + if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" ); $byte = unpack( 'C', $buf ); $byte = $byte[1]; throw new Exception( "At position: ".ftell($fh). ", Unknown byte ".$byte ); } } - + return array( 'frameCount' => $frameCount, 'looped' => $isLooped, - 'duration' => $duration + 'duration' => $duration, + 'xmp' => $xmp, + 'comment' => $comment, ); - } - + + /** + * @param $fh + * @param $bpp + * @return void + */ static function readGCT( $fh, $bpp ) { - if ($bpp > 0) { - for( $i=1; $i<=pow(2,$bpp); ++$i ) { + if ( $bpp > 0 ) { + for( $i=1; $i<=pow( 2, $bpp ); ++$i ) { fread( $fh, 3 ); } } } - + + /** + * @param $data + * @return int + */ static function decodeBPP( $data ) { + if ( strlen( $data ) < 1 ) throw new Exception( "Ran out of input" ); $buf = unpack( 'C', $data ); $buf = $buf[1]; $bpp = ( $buf & 7 ) + 1; $buf >>= 7; - + $have_map = $buf & 1; - + return $have_map ? $bpp : 0; } - + + /** + * @param $fh + * @return + */ static function skipBlock( $fh ) { while ( !feof( $fh ) ) { $buf = fread( $fh, 1 ); + if ( strlen( $buf ) < 1 ) throw new Exception( "Ran out of input" ); $block_len = unpack( 'C', $buf ); $block_len = $block_len[1]; - if ($block_len == 0) + if ($block_len == 0) { return; + } fread( $fh, $block_len ); } } + /** + * Read a block. In the GIF format, a block is made up of + * several sub-blocks. Each sub block starts with one byte + * saying how long the sub-block is, followed by the sub-block. + * The entire block is terminated by a sub-block of length + * 0. + * @param $fh FileHandle + * @param $includeLengths Boolean Include the length bytes of the + * sub-blocks in the returned value. Normally this is false, + * except XMP is weird and does a hack where you need to keep + * these length bytes. + * @return The data. + */ + static function readBlock( $fh, $includeLengths = false ) { + $data = ''; + $subLength = fread( $fh, 1 ); + $blocks = 0; + + while( $subLength !== "\0" ) { + $blocks++; + if ( $blocks > self::MAX_SUBBLOCKS ) { + throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" ); + } + if ( feof( $fh ) ) { + throw new Exception( "Read error: Unexpected EOF." ); + } + if ( $includeLengths ) { + $data .= $subLength; + } + + $data .= fread( $fh, ord( $subLength ) ); + $subLength = fread( $fh, 1 ); + } + return $data; + } } diff --git a/includes/media/Generic.php b/includes/media/Generic.php index fa4e731a..48735ebf 100644 --- a/includes/media/Generic.php +++ b/includes/media/Generic.php @@ -13,7 +13,9 @@ */ abstract class MediaHandler { const TRANSFORM_LATER = 1; - + const METADATA_GOOD = true; + const METADATA_BAD = false; + const METADATA_COMPATIBLE = 2; // for old but backwards compatible. /** * Instance cache */ @@ -21,6 +23,10 @@ abstract class MediaHandler { /** * Get a MediaHandler for a given MIME type from the instance cache + * + * @param $type string + * + * @return MediaHandler */ static function getHandler( $type ) { global $wgMediaHandlers; @@ -44,20 +50,27 @@ abstract class MediaHandler { */ abstract function getParamMap(); - /* + /** * Validate a thumbnail parameter at parse time. * Return true to accept the parameter, and false to reject it. * If you return false, the parser will do something quiet and forgiving. + * + * @param $name + * @param $value */ abstract function validateParam( $name, $value ); /** * Merge a parameter array into a string appropriate for inclusion in filenames + * + * @param $params array */ abstract function makeParamString( $params ); /** * Parse a param string made with makeParamString back into an array + * + * @param $str string */ abstract function parseParamString( $str ); @@ -65,6 +78,8 @@ abstract class MediaHandler { * Changes the parameter array as necessary, ready for transformation. * Should be idempotent. * Returns false if the parameters are unacceptable and the transform should fail + * @param $image + * @param $params */ abstract function normaliseParams( $image, &$params ); @@ -89,15 +104,66 @@ abstract class MediaHandler { function getMetadata( $image, $path ) { return ''; } /** + * Get metadata version. + * + * This is not used for validating metadata, this is used for the api when returning + * metadata, since api content formats should stay the same over time, and so things + * using ForiegnApiRepo can keep backwards compatibility + * + * All core media handlers share a common version number, and extensions can + * use the GetMetadataVersion hook to append to the array (they should append a unique + * string so not to get confusing). If there was a media handler named 'foo' with metadata + * version 3 it might add to the end of the array the element 'foo=3'. if the core metadata + * version is 2, the end version string would look like '2;foo=3'. + * + * @return string version string + */ + static function getMetadataVersion () { + $version = Array( '2' ); // core metadata version + wfRunHooks('GetMetadataVersion', Array(&$version)); + return implode( ';', $version); + } + + /** + * Convert metadata version. + * + * By default just returns $metadata, but can be used to allow + * media handlers to convert between metadata versions. + * + * @param $metadata Mixed String or Array metadata array (serialized if string) + * @param $version Integer target version + * @return Array serialized metadata in specified version, or $metadata on fail. + */ + function convertMetadataVersion( $metadata, $version = 1 ) { + if ( !is_array( $metadata ) ) { + + //unserialize to keep return parameter consistent. + wfSuppressWarnings(); + $ret = unserialize( $metadata ); + wfRestoreWarnings(); + return $ret; + } + return $metadata; + } + + /** * Get a string describing the type of metadata, for display purposes. + * + * @return string */ function getMetadataType( $image ) { return false; } /** * Check if the metadata string is valid for this handler. - * If it returns false, Image will reload the metadata from the file and update the database + * If it returns MediaHandler::METADATA_BAD (or false), Image + * will reload the metadata from the file and update the database. + * MediaHandler::METADATA_GOOD for if the metadata is a-ok, + * MediaHanlder::METADATA_COMPATIBLE if metadata is old but backwards + * compatible (which may or may not trigger a metadata reload). */ - function isMetadataValid( $image, $metadata ) { return true; } + function isMetadataValid( $image, $metadata ) { + return self::METADATA_GOOD; + } /** @@ -142,6 +208,18 @@ abstract class MediaHandler { * @return array thumbnail extension and MIME type */ function getThumbType( $ext, $mime, $params = null ) { + $magic = MimeMagic::singleton(); + if ( !$ext || $magic->isMatchingExtension( $ext, $mime ) === false ) { + // The extension is not valid for this mime type and we do + // recognize the mime type + $extensions = $magic->getExtensionsForType( $mime ); + if ( $extensions ) { + return array( strtok( $extensions, ' ' ), $mime ); + } + } + + // The extension is correct (true) or the mime type is unknown to + // MediaWiki (null) return array( $ext, $mime ); } @@ -176,6 +254,8 @@ abstract class MediaHandler { * Currently "width" and "height" are understood, but this might be * expanded in the future. * Returns false if unknown or if the document is not multi-page. + * + * @param $image File */ function getPageDimensions( $image, $page ) { $gis = $this->getImageSize( $image, $image->getPath() ); @@ -213,7 +293,7 @@ abstract class MediaHandler { */ /** - * FIXME: I don't really like this interface, it's not very flexible + * @todo FIXME: I don't really like this interface, it's not very flexible * I think the media handler should generate HTML instead. It can do * all the formatting according to some standard. That makes it possible * to do things like visual indication of grouped and chained streams @@ -223,22 +303,104 @@ abstract class MediaHandler { return false; } + /** sorts the visible/invisible field. + * Split off from ImageHandler::formatMetadata, as used by more than + * one type of handler. + * + * This is used by the media handlers that use the FormatMetadata class + * + * @param $metadataArray Array metadata array + * @return array for use displaying metadata. + */ + function formatMetadataHelper( $metadataArray ) { + $result = array( + 'visible' => array(), + 'collapsed' => array() + ); + + $formatted = FormatMetadata::getFormattedData( $metadataArray ); + // Sort fields into visible and collapsed + $visibleFields = $this->visibleMetadataFields(); + foreach ( $formatted as $name => $value ) { + $tag = strtolower( $name ); + self::addMeta( $result, + in_array( $tag, $visibleFields ) ? 'visible' : 'collapsed', + 'exif', + $tag, + $value + ); + } + return $result; + } + + /** + * Get a list of metadata items which should be displayed when + * the metadata table is collapsed. + * + * @return array of strings + * @access protected + */ + function visibleMetadataFields() { + $fields = array(); + $lines = explode( "\n", wfMsgForContent( 'metadata-fields' ) ); + foreach( $lines as $line ) { + $matches = array(); + if( preg_match( '/^\\*\s*(.*?)\s*$/', $line, $matches ) ) { + $fields[] = $matches[1]; + } + } + $fields = array_map( 'strtolower', $fields ); + return $fields; + } + + /** - * @todo Fixme: document this! - * 'value' thingy goes into a wikitext table; it used to be escaped but - * that was incompatible with previous practice of customized display + * This is used to generate an array element for each metadata value + * That array is then used to generate the table of metadata values + * on the image page + * + * @param &$array Array An array containing elements for each type of visibility + * and each of those elements being an array of metadata items. This function adds + * a value to that array. + * @param $visbility string ('visible' or 'collapsed') if this value is hidden + * by default. + * @param $type String type of metadata tag (currently always 'exif') + * @param $id String the name of the metadata tag (like 'artist' for example). + * its name in the table displayed is the message "$type-$id" (Ex exif-artist ). + * @param $value String thingy goes into a wikitext table; it used to be escaped but + * that was incompatible with previous practise of customized display * with wikitext formatting via messages such as 'exif-model-value'. * So the escaping is taken back out, but generally this seems a confusing * interface. + * @param $param String value to pass to the message for the name of the field + * as $1. Currently this parameter doesn't seem to ever be used. + * + * Note, everything here is passed through the parser later on (!) */ protected static function addMeta( &$array, $visibility, $type, $id, $value, $param = false ) { + $msgName = "$type-$id"; + if ( wfEmptyMsg( $msgName ) ) { + // This is for future compatibility when using instant commons. + // So as to not display as ugly a name if a new metadata + // property is defined that we don't know about + // (not a major issue since such a property would be collapsed + // by default). + wfDebug( __METHOD__ . ' Unknown metadata name: ' . $id . "\n" ); + $name = wfEscapeWikiText( $id ); + } else { + $name = wfMsg( $msgName, $param ); + } $array[$visibility][] = array( 'id' => "$type-$id", - 'name' => wfMsg( "$type-$id", $param ), + 'name' => $name, 'value' => $value ); } + /** + * @param $file File + * @return string + */ function getShortDesc( $file ) { global $wgLang; $nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ), @@ -246,14 +408,21 @@ abstract class MediaHandler { return "$nbytes"; } + /** + * @param $file File + * @return string + */ function getLongDesc( $file ) { - global $wgUser; - $sk = $wgUser->getSkin(); + global $wgLang; return wfMsgExt( 'file-info', 'parseinline', - $sk->formatSize( $file->getSize() ), + $wgLang->formatSize( $file->getSize() ), $file->getMimeType() ); } - + + /** + * @param $file File + * @return string + */ static function getGeneralShortDesc( $file ) { global $wgLang; $nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ), @@ -261,11 +430,14 @@ abstract class MediaHandler { return "$nbytes"; } + /** + * @param $file File + * @return string + */ static function getGeneralLongDesc( $file ) { - global $wgUser; - $sk = $wgUser->getSkin(); + global $wgLang; return wfMsgExt( 'file-info', 'parseinline', - $sk->formatSize( $file->getSize() ), + $wgLang->formatSize( $file->getSize() ), $file->getMimeType() ); } @@ -281,10 +453,10 @@ abstract class MediaHandler { /** * File validation hook called on upload. * - * If the file at the given local path is not valid, or its MIME type does not + * If the file at the given local path is not valid, or its MIME type does not * match the handler class, a Status object should be returned containing * relevant errors. - * + * * @param $fileName The local path to the file. * @return Status object */ @@ -321,12 +493,13 @@ abstract class MediaHandler { * @ingroup Media */ abstract class ImageHandler extends MediaHandler { + + /** + * @param $file File + * @return bool + */ function canRender( $file ) { - if ( $file->getWidth() && $file->getHeight() ) { - return true; - } else { - return false; - } + return ( $file->getWidth() && $file->getHeight() ); } function getParamMap() { @@ -371,6 +544,11 @@ abstract class ImageHandler extends MediaHandler { return array( 'width' => $params['width'] ); } + /** + * @param $image File + * @param $params + * @return bool + */ function normaliseParams( $image, &$params ) { $mimeType = $image->getMimeType(); @@ -392,13 +570,44 @@ abstract class ImageHandler extends MediaHandler { $srcWidth = $image->getWidth( $params['page'] ); $srcHeight = $image->getHeight( $params['page'] ); + if ( isset( $params['height'] ) && $params['height'] != -1 ) { + # Height & width were both set if ( $params['width'] * $srcHeight > $params['height'] * $srcWidth ) { + # Height is the relative smaller dimension, so scale width accordingly $params['width'] = wfFitBoxWidth( $srcWidth, $srcHeight, $params['height'] ); + + if ( $params['width'] == 0 ) { + # Very small image, so we need to rely on client side scaling :( + $params['width'] = 1; + } + + $params['physicalWidth'] = $params['width']; + } else { + # Height was crap, unset it so that it will be calculated later + unset( $params['height'] ); } } - $params['height'] = File::scaleHeight( $srcWidth, $srcHeight, $params['width'] ); - if ( !$this->validateThumbParams( $params['width'], $params['height'], $srcWidth, $srcHeight, $mimeType ) ) { + + if ( !isset( $params['physicalWidth'] ) ) { + # Passed all validations, so set the physicalWidth + $params['physicalWidth'] = $params['width']; + } + + # Because thumbs are only referred to by width, the height always needs + # to be scaled by the width to keep the thumbnail sizes consistent, + # even if it was set inside the if block above + $params['physicalHeight'] = File::scaleHeight( $srcWidth, $srcHeight, + $params['physicalWidth'] ); + + # Set the height if it was not validated in the if block higher up + if ( !isset( $params['height'] ) || $params['height'] == -1 ) { + $params['height'] = $params['physicalHeight']; + } + + + if ( !$this->validateThumbParams( $params['physicalWidth'], + $params['physicalHeight'], $srcWidth, $srcHeight, $mimeType ) ) { return false; } return true; @@ -435,9 +644,19 @@ abstract class ImageHandler extends MediaHandler { } $height = File::scaleHeight( $srcWidth, $srcHeight, $width ); + if ( $height == 0 ) { + # Force height to be at least 1 pixel + $height = 1; + } return true; } + /** + * @param $image File + * @param $script + * @param $params + * @return bool|ThumbnailImage + */ function getScriptedTransform( $image, $script, $params ) { if ( !$this->normaliseParams( $image, $params ) ) { return false; @@ -461,6 +680,10 @@ abstract class ImageHandler extends MediaHandler { return false; } + /** + * @param $file File + * @return string + */ function getShortDesc( $file ) { global $wgLang; $nbytes = wfMsgExt( 'nbytes', array( 'parsemag', 'escape' ), @@ -470,15 +693,34 @@ abstract class ImageHandler extends MediaHandler { return "$widthheight ($nbytes)"; } + /** + * @param $file File + * @return string + */ function getLongDesc( $file ) { global $wgLang; - return wfMsgExt('file-info-size', 'parseinline', - $wgLang->formatNum( $file->getWidth() ), - $wgLang->formatNum( $file->getHeight() ), - $wgLang->formatSize( $file->getSize() ), - $file->getMimeType() ); + $pages = $file->pageCount(); + if ( $pages === false || $pages <= 1 ) { + $msg = wfMsgExt('file-info-size', 'parseinline', + $wgLang->formatNum( $file->getWidth() ), + $wgLang->formatNum( $file->getHeight() ), + $wgLang->formatSize( $file->getSize() ), + $file->getMimeType() ); + } else { + $msg = wfMsgExt('file-info-size-pages', 'parseinline', + $wgLang->formatNum( $file->getWidth() ), + $wgLang->formatNum( $file->getHeight() ), + $wgLang->formatSize( $file->getSize() ), + $file->getMimeType(), + $wgLang->formatNum( $pages ) ); + } + return $msg; } + /** + * @param $file File + * @return string + */ function getDimensionsString( $file ) { global $wgLang; $pages = $file->pageCount(); diff --git a/includes/media/IPTC.php b/includes/media/IPTC.php new file mode 100644 index 00000000..1d19791c --- /dev/null +++ b/includes/media/IPTC.php @@ -0,0 +1,576 @@ +<?php +/** +*Class for some IPTC functions. + +*/ +class IPTC { + + /** + * This takes the results of iptcparse() and puts it into a + * form that can be handled by mediawiki. Generally called from + * BitmapMetadataHandler::doApp13. + * + * @see http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf + * + * @param $rawData String app13 block from jpeg containing iptc/iim data + * @return Array iptc metadata array + */ + static function parse( $rawData ) { + $parsed = iptcparse( $rawData ); + $data = Array(); + if (!is_array($parsed)) { + return $data; + } + + $c = ''; + //charset info contained in tag 1:90. + if (isset($parsed['1#090']) && isset($parsed['1#090'][0])) { + $c = self::getCharset($parsed['1#090'][0]); + if ($c === false) { + //Unknown charset. refuse to parse. + //note: There is a different between + //unknown and no charset specified. + return array(); + } + unset( $parsed['1#090'] ); + } + + foreach ( $parsed as $tag => $val ) { + if ( isset( $val[0] ) && trim($val[0]) == '' ) { + wfDebugLog('iptc', "IPTC tag $tag had only whitespace as its value."); + continue; + } + switch( $tag ) { + case '2#120': /*IPTC caption. mapped with exif ImageDescription*/ + $data['ImageDescription'] = self::convIPTC( $val, $c ); + break; + case '2#116': /* copyright. Mapped with exif copyright */ + $data['Copyright'] = self::convIPTC( $val, $c ); + break; + case '2#080': /* byline. Mapped with exif Artist */ + /* merge with byline title (2:85) + * like how exif does it with + * Title, person. Not sure if this is best + * approach since we no longer have the two fields + * separate. each byline title entry corresponds to a + * specific byline. */ + + $bylines = self::convIPTC( $val, $c ); + if ( isset( $parsed['2#085'] ) ) { + $titles = self::convIPTC( $parsed['2#085'], $c ); + } else { + $titles = array(); + } + + for ( $i = 0; $i < count( $titles ); $i++ ) { + if ( isset( $bylines[$i] ) ) { + // theoretically this should always be set + // but doesn't hurt to be careful. + $bylines[$i] = $titles[$i] . ', ' . $bylines[$i]; + } + } + $data['Artist'] = $bylines; + break; + case '2#025': /* keywords */ + $data['Keywords'] = self::convIPTC( $val, $c ); + break; + case '2#101': /* Country (shown)*/ + $data['CountryDest'] = self::convIPTC( $val, $c ); + break; + case '2#095': /* state/province (shown) */ + $data['ProvinceOrStateDest'] = self::convIPTC( $val, $c ); + break; + case '2#090': /* city (Shown) */ + $data['CityDest'] = self::convIPTC( $val, $c ); + break; + case '2#092': /* sublocation (shown) */ + $data['SublocationDest'] = self::convIPTC( $val, $c ); + break; + case '2#005': /* object name/title */ + $data['ObjectName'] = self::convIPTC( $val, $c ); + break; + case '2#040': /* special instructions */ + $data['SpecialInstructions'] = self::convIPTC( $val, $c ); + break; + case '2#105': /* headline*/ + $data['Headline'] = self::convIPTC( $val, $c ); + break; + case '2#110': /* credit */ + /*"Identifies the provider of the objectdata, + * not necessarily the owner/creator". */ + $data['Credit'] = self::convIPTC( $val, $c ); + break; + case '2#115': /* source */ + /* "Identifies the original owner of the intellectual content of the + *objectdata. This could be an agency, a member of an agency or + *an individual." */ + $data['Source'] = self::convIPTC( $val, $c ); + break; + + case '2#007': /* edit status (lead, correction, etc) */ + $data['EditStatus'] = self::convIPTC( $val, $c ); + break; + case '2#015': /* category. deprecated. max 3 letters in theory, often more */ + $data['iimCategory'] = self::convIPTC( $val, $c ); + break; + case '2#020': /* category. deprecated. */ + $data['iimSupplementalCategory'] = self::convIPTC( $val, $c ); + break; + case '2#010': /*urgency (1-8. 1 most, 5 normal, 8 low priority)*/ + $data['Urgency'] = self::convIPTC( $val, $c ); + break; + case '2#022': + /* "Identifies objectdata that recurs often and predictably... + * Example: Euroweather" */ + $data['FixtureIdentifier'] = self::convIPTC( $val, $c ); + break; + case '2#026': + /* Content location code (iso 3166 + some custom things) + * ex: TUR (for turkey), XUN (for UN), XSP (outer space) + * See wikipedia article on iso 3166 and appendix D of iim std. */ + $data['LocationDestCode'] = self::convIPTC( $val, $c ); + break; + case '2#027': + /* Content location name. Full printable name + * of location of photo. */ + $data['LocationDest'] = self::convIPTC( $val, $c ); + break; + case '2#065': + /* Originating Program. + * Combine with Program version (2:70) if present. + */ + $software = self::convIPTC( $val, $c ); + + if ( count( $software ) !== 1 ) { + //according to iim standard this cannot have multiple values + //so if there is more than one, something weird is happening, + //and we skip it. + wfDebugLog( 'iptc', 'IPTC: Wrong count on 2:65 Software field' ); + break; + } + + if ( isset( $parsed['2#070'] ) ) { + //if a version is set for the software. + $softwareVersion = self::convIPTC( $parsed['2#070'], $c ); + unset($parsed['2#070']); + $data['Software'] = array( array( $software[0], $softwareVersion[0] ) ); + } else { + $data['Software'] = $software; + } + break; + case '2#075': + /* Object cycle. + * a for morning (am), p for evening, b for both */ + $data['ObjectCycle'] = self::convIPTC( $val, $c ); + break; + case '2#100': + /* Country/Primary location code. + * "Indicates the code of the country/primary location where the + * intellectual property of the objectdata was created" + * unclear how this differs from 2#026 + */ + $data['CountryCodeDest'] = self::convIPTC( $val, $c ); + break; + case '2#103': + /* original transmission ref. + * "A code representing the location of original transmission ac- + * cording to practises of the provider." + */ + $data['OriginalTransmissionRef'] = self::convIPTC( $val, $c ); + break; + case '2#118': /*contact*/ + $data['Contact'] = self::convIPTC( $val, $c ); + break; + case '2#122': + /* Writer/Editor + * "Identification of the name of the person involved in the writing, + * editing or correcting the objectdata or caption/abstract." + */ + $data['Writer'] = self::convIPTC( $val, $c ); + break; + case '2#135': /* lang code */ + $data['LanguageCode'] = self::convIPTC( $val, $c ); + break; + + // Start date stuff. + // It doesn't accept incomplete dates even though they are valid + // according to spec. + // Should potentially store timezone as well. + case '2#055': + //Date created (not date digitized). + //Maps to exif DateTimeOriginal + if ( isset( $parsed['2#060'] ) ) { + $time = $parsed['2#060']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeOriginal'] = $timestamp; + } + break; + + case '2#062': + //Date converted to digital representation. + //Maps to exif DateTimeDigitized + if ( isset( $parsed['2#063'] ) ) { + $time = $parsed['2#063']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeDigitized'] = $timestamp; + } + break; + + case '2#030': + //Date released. + if ( isset( $parsed['2#035'] ) ) { + $time = $parsed['2#035']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeReleased'] = $timestamp; + } + break; + + case '2#037': + //Date expires. + if ( isset( $parsed['2#038'] ) ) { + $time = $parsed['2#038']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeExpires'] = $timestamp; + } + break; + + case '2#000': /* iim version */ + // unlike other tags, this is a 2-byte binary number. + //technically this is required if there is iptc data + //but in practise it isn't always there. + if ( strlen( $val[0] ) == 2 ) { + //if is just to be paranoid. + $versionValue = ord( substr( $val[0], 0, 1 ) ) * 256; + $versionValue += ord( substr( $val[0], 1, 1 ) ); + $data['iimVersion'] = $versionValue; + } + break; + + case '2#004': + // IntellectualGenere. + // first 4 characters are an id code + // That we're not really interested in. + + // This prop is weird, since it's + // allowed to have multiple values + // in iim 4.1, but not in the XMP + // stuff. We're going to just + // extract the first value. + $con = self::ConvIPTC( $val, $c ); + if ( strlen( $con[0] ) < 5 ) { + wfDebugLog( 'iptc', 'IPTC: ' + . '2:04 too short. ' + . 'Ignoring.' ); + break; + } + $extracted = substr( $con[0], 4 ); + $data['IntellectualGenre'] = $extracted; + break; + + case '2#012': + // Subject News code - this is a compound field + // at the moment we only extract the subject news + // code, which is an 8 digit (ascii) number + // describing the subject matter of the content. + $codes = self::convIPTC( $val, $c ); + foreach ( $codes as $ic ) { + $fields = explode(':', $ic, 3 ); + + if ( count( $fields ) < 2 || + $fields[0] !== 'IPTC' ) + { + wfDebugLog( 'IPTC', 'IPTC: ' + . 'Invalid 2:12 - ' . $ic ); + break; + } + $data['SubjectNewsCode'] = $fields[1]; + } + break; + + // purposely does not do 2:125, 2:130, 2:131, + // 2:47, 2:50, 2:45, 2:42, 2:8, 2:3 + // 2:200, 2:201, 2:202 + // or the audio stuff (2:150 to 2:154) + + case '2#070': + case '2#060': + case '2#063': + case '2#085': + case '2#038': + case '2#035': + //ignore. Handled elsewhere. + break; + + default: + wfDebugLog( 'iptc', "Unsupported iptc tag: $tag. Value: " . implode( ',', $val )); + break; + } + + } + return $data; + } + + /** + * Convert an iptc date and time tags into the exif format + * + * @todo Potentially this should also capture the timezone offset. + * @param Array $date The date tag + * @param Array $time The time tag + * @param $c + * @return String Date in exif format. + */ + private static function timeHelper( $date, $time, $c ) { + if ( count( $date ) === 1 ) { + //the standard says this should always be 1 + //just double checking. + list($date) = self::convIPTC( $date, $c ); + } else { + return null; + } + + if ( count( $time ) === 1 ) { + list($time) = self::convIPTC( $time, $c ); + $dateOnly = false; + } else { + $time = '000000+0000'; //placeholder + $dateOnly = true; + } + + if ( ! ( preg_match('/\d\d\d\d\d\d[-+]\d\d\d\d/', $time) + && preg_match('/\d\d\d\d\d\d\d\d/', $date) + && substr($date, 0, 4) !== '0000' + && substr($date, 4, 2) !== '00' + && substr($date, 6, 2) !== '00' + ) ) { + //something wrong. + // Note, this rejects some valid dates according to iptc spec + // for example: the date 00000400 means the photo was taken in + // April, but the year and day is unknown. We don't process these + // types of incomplete dates atm. + wfDebugLog( 'iptc', "IPTC: invalid time ( $time ) or date ( $date )"); + return null; + } + + $unixTS = wfTimestamp( TS_UNIX, $date . substr( $time, 0, 6 )); + if ( $unixTS === false ) { + wfDebugLog( 'iptc', "IPTC: can't convert date to TS_UNIX: $date $time." ); + return null; + } + + $tz = ( intval( substr( $time, 7, 2 ) ) *60*60 ) + + ( intval( substr( $time, 9, 2 ) ) * 60 ); + + if ( substr( $time, 6, 1 ) === '-' ) { + $tz = - $tz; + } + + $finalTimestamp = wfTimestamp( TS_EXIF, $unixTS + $tz ); + if ( $finalTimestamp === false ) { + wfDebugLog( 'iptc', "IPTC: can't make final timestamp. Date: " . ( $unixTS + $tz ) ); + return null; + } + if ( $dateOnly ) { + //return the date only + return substr( $finalTimestamp, 0, 10 ); + } else { + return $finalTimestamp; + } + } + + /** + * Helper function to convert charset for iptc values. + * @param $data Mixed String or Array: The iptc string + * @param $charset String: The charset + * + * @return string + */ + private static function convIPTC ( $data, $charset ) { + if ( is_array( $data ) ) { + foreach ($data as &$val) { + $val = self::convIPTCHelper( $val, $charset ); + } + } else { + $data = self::convIPTCHelper( $data, $charset ); + } + + return $data; + } + /** + * Helper function of a helper function to convert charset for iptc values. + * @param $data Mixed String or Array: The iptc string + * @param $charset String: The charset + * + * @return string + */ + private static function convIPTCHelper ( $data, $charset ) { + if ( $charset ) { + wfSuppressWarnings(); + $data = iconv($charset, "UTF-8//IGNORE", $data); + wfRestoreWarnings(); + if ($data === false) { + $data = ""; + wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset $charset to utf-8"); + } + } else { + //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252 + // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8 + $oldData = $data; + UtfNormal::quickIsNFCVerify( $data ); //make $data valid utf-8 + if ($data === $oldData) { + return $data; //if validation didn't change $data + } else { + return self::convIPTCHelper( $oldData, 'Windows-1252' ); + } + } + return trim( $data ); + } + + /** + * take the value of 1:90 tag and returns a charset + * @param String $tag 1:90 tag. + * @return string charset name or "?" + * Warning, this function does not (and is not intended to) detect + * all iso 2022 escape codes. In practise, the code for utf-8 is the + * only code that seems to have wide use. It does detect that code. + */ + static function getCharset($tag) { + + //According to iim standard, charset is defined by the tag 1:90. + //in which there are iso 2022 escape sequences to specify the character set. + //the iim standard seems to encourage that all necessary escape sequences are + //in the 1:90 tag, but says it doesn't have to be. + + //This is in need of more testing probably. This is definitely not complete. + //however reading the docs of some other iptc software, it appears that most iptc software + //only recognizes utf-8. If 1:90 tag is not present content is + // usually ascii or iso-8859-1 (and sometimes utf-8), but no guarantee. + + //This also won't work if there are more than one escape sequence in the 1:90 tag + //or if something is put in the G2, or G3 charsets, etc. It will only reliably recognize utf-8. + + // This is just going through the charsets mentioned in appendix C of the iim standard. + + // \x1b = ESC. + switch ( $tag ) { + case "\x1b%G": //utf-8 + //Also call things that are compatible with utf-8, utf-8 (e.g. ascii) + case "\x1b(B": // ascii + case "\x1b(@": // iso-646-IRV (ascii in latest version, $ different in older version) + $c = 'UTF-8'; + break; + case "\x1b(A": //like ascii, but british. + $c = 'ISO646-GB'; + break; + case "\x1b(C": //some obscure sweedish/finland encoding + $c = 'ISO-IR-8-1'; + break; + case "\x1b(D": + $c = 'ISO-IR-8-2'; + break; + case "\x1b(E": //some obscure danish/norway encoding + $c = 'ISO-IR-9-1'; + break; + case "\x1b(F": + $c = 'ISO-IR-9-2'; + break; + case "\x1b(G": + $c = 'SEN_850200_B'; // aka iso 646-SE; ascii-like + break; + case "\x1b(I": + $c = "ISO646-IT"; + break; + case "\x1b(L": + $c = "ISO646-PT"; + break; + case "\x1b(Z": + $c = "ISO646-ES"; + break; + case "\x1b([": + $c = "GREEK7-OLD"; + break; + case "\x1b(K": + $c = "ISO646-DE"; + break; + case "\x1b(N": //crylic + $c = "ISO_5427"; + break; + case "\x1b(`": //iso646-NO + $c = "NS_4551-1"; + break; + case "\x1b(f": //iso646-FR + $c = "NF_Z_62-010"; + break; + case "\x1b(g": + $c = "PT2"; //iso646-PT2 + break; + case "\x1b(h": + $c = "ES2"; + break; + case "\x1b(i": //iso646-HU + $c = "MSZ_7795.3"; + break; + case "\x1b(w": + $c = "CSA_Z243.4-1985-1"; + break; + case "\x1b(x": + $c = "CSA_Z243.4-1985-2"; + break; + case "\x1b\$(B": + case "\x1b\$B": + case "\x1b&@\x1b\$B": + case "\x1b&@\x1b\$(B": + $c = "JIS_C6226-1983"; + break; + case "\x1b-A": // iso-8859-1. at least for the high code characters. + case "\x1b(@\x1b-A": + case "\x1b(B\x1b-A": + $c = 'ISO-8859-1'; + break; + case "\x1b-B": // iso-8859-2. at least for the high code characters. + $c = 'ISO-8859-2'; + break; + case "\x1b-C": // iso-8859-3. at least for the high code characters. + $c = 'ISO-8859-3'; + break; + case "\x1b-D": // iso-8859-4. at least for the high code characters. + $c = 'ISO-8859-4'; + break; + case "\x1b-E": // iso-8859-5. at least for the high code characters. + $c = 'ISO-8859-5'; + break; + case "\x1b-F": // iso-8859-6. at least for the high code characters. + $c = 'ISO-8859-6'; + break; + case "\x1b-G": // iso-8859-7. at least for the high code characters. + $c = 'ISO-8859-7'; + break; + case "\x1b-H": // iso-8859-8. at least for the high code characters. + $c = 'ISO-8859-8'; + break; + case "\x1b-I": // CSN_369103. at least for the high code characters. + $c = 'CSN_369103'; + break; + default: + wfDebugLog('iptc', __METHOD__ . 'Unknown charset in iptc 1:90: ' . bin2hex( $tag ) ); + //at this point just give up and refuse to parse iptc? + $c = false; + } + return $c; + } +} diff --git a/includes/media/Jpeg.php b/includes/media/Jpeg.php new file mode 100644 index 00000000..7033409b --- /dev/null +++ b/includes/media/Jpeg.php @@ -0,0 +1,46 @@ +<?php +/** + * @file + * @ingroup Media + */ + +/** + * JPEG specific handler. + * Inherits most stuff from BitmapHandler, just here to do the metadata handler differently. + * + * Metadata stuff common to Jpeg and built-in Tiff (not PagedTiffHandler) is + * in ExifBitmapHandler. + * + * @ingroup Media + */ +class JpegHandler extends ExifBitmapHandler { + + function getMetadata ( $image, $filename ) { + try { + $meta = BitmapMetadataHandler::Jpeg( $filename ); + if ( !is_array( $meta ) ) { + // This should never happen, but doesn't hurt to be paranoid. + throw new MWException('Metadata array is not an array'); + } + $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); + return serialize( $meta ); + } + catch ( MWException $e ) { + // BitmapMetadataHandler throws an exception in certain exceptional cases like if file does not exist. + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); + + /* This used to use 0 (ExifBitmapHandler::OLD_BROKEN_FILE) for the cases + * * No metadata in the file + * * Something is broken in the file. + * However, if the metadata support gets expanded then you can't tell if the 0 is from + * a broken file, or just no props found. A broken file is likely to stay broken, but + * a file which had no props could have props once the metadata support is improved. + * Thus switch to using -1 to denote only a broken file, and use an array with only + * MEDIAWIKI_EXIF_VERSION to denote no props. + */ + return ExifBitmapHandler::BROKEN_FILE; + } + } + +} + diff --git a/includes/media/JpegMetadataExtractor.php b/includes/media/JpegMetadataExtractor.php new file mode 100644 index 00000000..4769bf8e --- /dev/null +++ b/includes/media/JpegMetadataExtractor.php @@ -0,0 +1,252 @@ +<?php +/** +* Class for reading jpegs and extracting metadata. +* see also BitmapMetadataHandler. +* +* Based somewhat on GIFMetadataExtrator. +*/ +class JpegMetadataExtractor { + + const MAX_JPEG_SEGMENTS = 200; + // the max segment is a sanity check. + // A jpeg file should never even remotely have + // that many segments. Your average file has about 10. + + /** Function to extract metadata segments of interest from jpeg files + * based on GIFMetadataExtractor. + * + * we can almost use getimagesize to do this + * but gis doesn't support having multiple app1 segments + * and those can't extract xmp on files containing both exif and xmp data + * + * @param String $filename name of jpeg file + * @return Array of interesting segments. + * @throws MWException if given invalid file. + */ + static function segmentSplitter ( $filename ) { + $showXMP = function_exists( 'xml_parser_create_ns' ); + + $segmentCount = 0; + + $segments = array( + 'XMP_ext' => array(), + 'COM' => array(), + ); + + if ( !$filename ) { + throw new MWException( "No filename specified for " . __METHOD__ ); + } + if ( !file_exists( $filename ) || is_dir( $filename ) ) { + throw new MWException( "Invalid file $filename passed to " . __METHOD__ ); + } + + $fh = fopen( $filename, "rb" ); + + if ( !$fh ) { + throw new MWException( "Could not open file $filename" ); + } + + $buffer = fread( $fh, 2 ); + if ( $buffer !== "\xFF\xD8" ) { + throw new MWException( "Not a jpeg, no SOI" ); + } + while ( !feof( $fh ) ) { + $buffer = fread( $fh, 1 ); + $segmentCount++; + if ( $segmentCount > self::MAX_JPEG_SEGMENTS ) { + // this is just a sanity check + throw new MWException( 'Too many jpeg segments. Aborting' ); + } + if ( $buffer !== "\xFF" ) { + throw new MWException( "Error reading jpeg file marker. Expected 0xFF but got " . bin2hex( $buffer ) ); + } + + $buffer = fread( $fh, 1 ); + while( $buffer === "\xFF" && !feof( $fh ) ) { + // Skip through any 0xFF padding bytes. + $buffer = fread( $fh, 1 ); + } + if ( $buffer === "\xFE" ) { + + // COM section -- file comment + // First see if valid utf-8, + // if not try to convert it to windows-1252. + $com = $oldCom = trim( self::jpegExtractMarker( $fh ) ); + UtfNormal::quickIsNFCVerify( $com ); + // turns $com to valid utf-8. + // thus if no change, its utf-8, otherwise its something else. + if ( $com !== $oldCom ) { + wfSuppressWarnings(); + $com = $oldCom = iconv( 'windows-1252', 'UTF-8//IGNORE', $oldCom ); + wfRestoreWarnings(); + } + // Try it again, if its still not a valid string, then probably + // binary junk or some really weird encoding, so don't extract. + UtfNormal::quickIsNFCVerify( $com ); + if ( $com === $oldCom ) { + $segments["COM"][] = $oldCom; + } else { + wfDebug( __METHOD__ . ' Ignoring JPEG comment as is garbage.' ); + } + + } elseif ( $buffer === "\xE1" ) { + // APP1 section (Exif, XMP, and XMP extended) + // only extract if XMP is enabled. + $temp = self::jpegExtractMarker( $fh ); + // check what type of app segment this is. + if ( substr( $temp, 0, 29 ) === "http://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { + $segments["XMP"] = substr( $temp, 29 ); + } elseif ( substr( $temp, 0, 35 ) === "http://ns.adobe.com/xmp/extension/\x00" && $showXMP ) { + $segments["XMP_ext"][] = substr( $temp, 35 ); + } elseif ( substr( $temp, 0, 29 ) === "XMP\x00://ns.adobe.com/xap/1.0/\x00" && $showXMP ) { + // Some images (especially flickr images) seem to have this. + // I really have no idea what the deal is with them, but + // whatever... + $segments["XMP"] = substr( $temp, 29 ); + wfDebug( __METHOD__ . ' Found XMP section with wrong app identifier ' + . "Using anyways.\n" ); + } elseif ( substr( $temp, 0, 6 ) === "Exif\0\0" ) { + // Just need to find out what the byte order is. + // because php's exif plugin sucks... + // This is a II for little Endian, MM for big. Not a unicode BOM. + $byteOrderMarker = substr( $temp, 6, 2 ); + if ( $byteOrderMarker === 'MM' ) { + $segments['byteOrder'] = 'BE'; + } elseif ( $byteOrderMarker === 'II' ) { + $segments['byteOrder'] = 'LE'; + } else { + wfDebug( __METHOD__ . ' Invalid byte ordering?!' ); + } + } + } elseif ( $buffer === "\xED" ) { + // APP13 - PSIR. IPTC and some photoshop stuff + $temp = self::jpegExtractMarker( $fh ); + if ( substr( $temp, 0, 14 ) === "Photoshop 3.0\x00" ) { + $segments["PSIR"] = $temp; + } + } elseif ( $buffer === "\xD9" || $buffer === "\xDA" ) { + // EOI - end of image or SOS - start of scan. either way we're past any interesting segments + return $segments; + } else { + // segment we don't care about, so skip + $size = wfUnpack( "nint", fread( $fh, 2 ), 2 ); + if ( $size['int'] <= 2 ) throw new MWException( "invalid marker size in jpeg" ); + fseek( $fh, $size['int'] - 2, SEEK_CUR ); + } + + } + // shouldn't get here. + throw new MWException( "Reached end of jpeg file unexpectedly" ); + } + + /** + * Helper function for jpegSegmentSplitter + * @param &$fh FileHandle for jpeg file + * @return data content of segment. + */ + private static function jpegExtractMarker( &$fh ) { + $size = wfUnpack( "nint", fread( $fh, 2 ), 2 ); + if ( $size['int'] <= 2 ) throw new MWException( "invalid marker size in jpeg" ); + $segment = fread( $fh, $size['int'] - 2 ); + if ( strlen( $segment ) !== $size['int'] - 2 ) throw new MWException( "Segment shorter than expected" ); + return $segment; + } + + /** + * This reads the photoshop image resource. + * Currently it only compares the iptc/iim hash + * with the stored hash, which is used to determine the precedence + * of the iptc data. In future it may extract some other info, like + * url of copyright license. + * + * This should generally be called by BitmapMetadataHandler::doApp13() + * + * @param String $app13 photoshop psir app13 block from jpg. + * @return String if the iptc hash is good or not. + */ + public static function doPSIR ( $app13 ) { + if ( !$app13 ) { + return; + } + // First compare hash with real thing + // 0x404 contains IPTC, 0x425 has hash + // This is used to determine if the iptc is newer than + // the xmp data, as xmp programs update the hash, + // where non-xmp programs don't. + + $offset = 14; // skip past PHOTOSHOP 3.0 identifier. should already be checked. + $appLen = strlen( $app13 ); + $realHash = ""; + $recordedHash = ""; + + // the +12 is the length of an empty item. + while ( $offset + 12 <= $appLen ) { + $valid = true; + if ( substr( $app13, $offset, 4 ) !== '8BIM' ) { + // its supposed to be 8BIM + // but apparently sometimes isn't esp. in + // really old jpg's + $valid = false; + } + $offset += 4; + $id = substr( $app13, $offset, 2 ); + // id is a 2 byte id number which identifies + // the piece of info this record contains. + + $offset += 2; + + // some record types can contain a name, which + // is a pascal string 0-padded to be an even + // number of bytes. Most times (and any time + // we care) this is empty, making it two null bytes. + + $lenName = ord( substr( $app13, $offset, 1 ) ) + 1; + // we never use the name so skip it. +1 for length byte + if ( $lenName % 2 == 1 ) { + $lenName++; + } // pad to even. + $offset += $lenName; + + // now length of data (unsigned long big endian) + $lenData = wfUnpack( 'Nlen', substr( $app13, $offset, 4 ), 4 ); + // PHP can take issue with very large unsigned ints and make them negative. + // Which should never ever happen, as this has to be inside a segment + // which is limited to a 16 bit number. + if ( $lenData['len'] < 0 ) throw new MWException( "Too big PSIR (" . $lenData['len'] . ')' ); + + $offset += 4; // 4bytes length field; + + // this should not happen, but check. + if ( $lenData['len'] + $offset > $appLen ) { + wfDebug( __METHOD__ . " PSIR data too long.\n" ); + return 'iptc-no-hash'; + } + + if ( $valid ) { + switch ( $id ) { + case "\x04\x04": + // IPTC block + $realHash = md5( substr( $app13, $offset, $lenData['len'] ), true ); + break; + case "\x04\x25": + $recordedHash = substr( $app13, $offset, $lenData['len'] ); + break; + } + } + + // if odd, add 1 to length to account for + // null pad byte. + if ( $lenData['len'] % 2 == 1 ) $lenData['len']++; + $offset += $lenData['len']; + + } + + if ( !$realHash || !$recordedHash ) { + return 'iptc-no-hash'; + } elseif ( $realHash === $recordedHash ) { + return 'iptc-good-hash'; + } else { /*$realHash !== $recordedHash */ + return 'iptc-bad-hash'; + } + } +} diff --git a/includes/media/MediaTransformOutput.php b/includes/media/MediaTransformOutput.php index c441f06c..f170bb9d 100644 --- a/includes/media/MediaTransformOutput.php +++ b/includes/media/MediaTransformOutput.php @@ -12,7 +12,12 @@ * @ingroup Media */ abstract class MediaTransformOutput { - var $file, $width, $height, $url, $page, $path; + /** + * @var File + */ + var $file; + + var $width, $height, $url, $page, $path; /** * Get the width of the output box @@ -45,7 +50,7 @@ abstract class MediaTransformOutput { /** * Fetch HTML for this transform output * - * @param $options Associative array of options. Boolean options + * @param $options array Associative array of options. Boolean options * should be indicated with a value of true for true, and false or * absent for false. * @@ -73,6 +78,11 @@ abstract class MediaTransformOutput { /** * Wrap some XHTML text in an anchor tag with the given attributes + * + * @param $linkAttribs array + * @param $contents string + * + * @return string */ protected function linkWrap( $linkAttribs, $contents ) { if ( $linkAttribs ) { @@ -82,6 +92,11 @@ abstract class MediaTransformOutput { } } + /** + * @param $title string + * @param $params array + * @return array + */ function getDescLinkAttribs( $title = null, $params = '' ) { $query = $this->page ? ( 'page=' . urlencode( $this->page ) ) : ''; if( $params ) { @@ -98,7 +113,6 @@ abstract class MediaTransformOutput { } } - /** * Media transform output for images * @@ -131,7 +145,7 @@ class ThumbnailImage extends MediaTransformOutput { * Return HTML <img ... /> tag for the thumbnail, will include * width and height attributes and a blank alt text (as required). * - * @param $options Associative array of options. Boolean options + * @param $options array Associative array of options. Boolean options * should be indicated with a value of true for true, and false or * absent for false. * @@ -212,8 +226,8 @@ class MediaTransformError extends MediaTransformOutput { $htmlArgs = array_map( 'htmlspecialchars', $args ); $htmlArgs = array_map( 'nl2br', $htmlArgs ); - $this->htmlMsg = wfMsgReplaceArgs( htmlspecialchars( wfMsgGetKey( $msg, true ) ), $htmlArgs ); - $this->textMsg = wfMsgReal( $msg, $args ); + $this->htmlMsg = wfMessage( $msg )->rawParams( $htmlArgs )->escaped(); + $this->textMsg = wfMessage( $msg )->rawParams( $htmlArgs )->text(); $this->width = intval( $width ); $this->height = intval( $height ); $this->url = false; diff --git a/includes/media/PNG.php b/includes/media/PNG.php index 5197282c..8fe9ecb4 100644 --- a/includes/media/PNG.php +++ b/includes/media/PNG.php @@ -12,26 +12,51 @@ * @ingroup Media */ class PNGHandler extends BitmapHandler { - + + const BROKEN_FILE = '0'; + + /** + * @param File $image + * @param string $filename + * @return string + */ function getMetadata( $image, $filename ) { - if ( !isset($image->parsedPNGMetadata) ) { - try { - $image->parsedPNGMetadata = PNGMetadataExtractor::getMetadata( $filename ); - } catch( Exception $e ) { - // Broken file? - wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); - return '0'; - } + try { + $metadata = BitmapMetadataHandler::PNG( $filename ); + } catch( Exception $e ) { + // Broken file? + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); + return self::BROKEN_FILE; } - return serialize($image->parsedPNGMetadata); - + return serialize($metadata); } - + + /** + * @param $image File + * @return array|bool + */ function formatMetadata( $image ) { - return false; + $meta = $image->getMetadata(); + + if ( !$meta ) { + return false; + } + $meta = unserialize( $meta ); + if ( !isset( $meta['metadata'] ) || count( $meta['metadata'] ) <= 1 ) { + return false; + } + + if ( isset( $meta['metadata']['_MW_PNG_VERSION'] ) ) { + unset( $meta['metadata']['_MW_PNG_VERSION'] ); + } + return $this->formatMetadataHelper( $meta['metadata'] ); } - + + /** + * @param $image File + * @return bool + */ function isAnimatedImage( $image ) { $ser = $image->getMetadata(); if ($ser) { @@ -46,11 +71,33 @@ class PNGHandler extends BitmapHandler { } function isMetadataValid( $image, $metadata ) { + + if ( $metadata === self::BROKEN_FILE ) { + // Do not repetitivly regenerate metadata on broken file. + return self::METADATA_GOOD; + } + wfSuppressWarnings(); $data = unserialize( $metadata ); wfRestoreWarnings(); - return (boolean) $data; + + if ( !$data || !is_array( $data ) ) { + wfDebug(__METHOD__ . ' invalid png metadata' ); + return self::METADATA_BAD; + } + + if ( !isset( $data['metadata']['_MW_PNG_VERSION'] ) + || $data['metadata']['_MW_PNG_VERSION'] != PNGMetadataExtractor::VERSION ) { + wfDebug(__METHOD__ . ' old but compatible png metadata' ); + return self::METADATA_COMPATIBLE; + } + return self::METADATA_GOOD; } + + /** + * @param $image File + * @return string + */ function getLongDesc( $image ) { global $wgLang; $original = parent::getLongDesc( $image ); @@ -65,16 +112,19 @@ class PNGHandler extends BitmapHandler { $info = array(); $info[] = $original; - if ($metadata['loopCount'] == 0) + if ( $metadata['loopCount'] == 0 ) { $info[] = wfMsgExt( 'file-info-png-looped', 'parseinline' ); - elseif ($metadata['loopCount'] > 1) + } elseif ( $metadata['loopCount'] > 1 ) { $info[] = wfMsgExt( 'file-info-png-repeat', 'parseinline', $metadata['loopCount'] ); + } - if ($metadata['frameCount'] > 0) + if ( $metadata['frameCount'] > 0 ) { $info[] = wfMsgExt( 'file-info-png-frames', 'parseinline', $metadata['frameCount'] ); + } - if ($metadata['duration']) + if ( $metadata['duration'] ) { $info[] = $wgLang->formatTimePeriod( $metadata['duration'] ); + } return $wgLang->commaList( $info ); } diff --git a/includes/media/PNGMetadataExtractor.php b/includes/media/PNGMetadataExtractor.php index 6a931e6c..d3c44d4f 100644 --- a/includes/media/PNGMetadataExtractor.php +++ b/includes/media/PNGMetadataExtractor.php @@ -1,6 +1,6 @@ <?php /** - * PNG frame counter. + * PNG frame counter and metadata extractor. * Slightly derived from GIFMetadataExtractor.php * Deliberately not using MWExceptions to avoid external dependencies, encouraging * redistribution. @@ -17,26 +17,61 @@ class PNGMetadataExtractor { static $png_sig; static $CRC_size; + static $text_chunks; + + const VERSION = 1; + const MAX_CHUNK_SIZE = 3145728; // 3 megabytes static function getMetadata( $filename ) { self::$png_sig = pack( "C8", 137, 80, 78, 71, 13, 10, 26, 10 ); self::$CRC_size = 4; - + /* based on list at http://owl.phy.queensu.ca/~phil/exiftool/TagNames/PNG.html#TextualData + * and http://www.w3.org/TR/PNG/#11keywords + */ + self::$text_chunks = array( + 'xml:com.adobe.xmp' => 'xmp', + # Artist is unofficial. Author is the recommended + # keyword in the PNG spec. However some people output + # Artist so support both. + 'artist' => 'Artist', + 'model' => 'Model', + 'make' => 'Make', + 'author' => 'Artist', + 'comment' => 'PNGFileComment', + 'description' => 'ImageDescription', + 'title' => 'ObjectName', + 'copyright' => 'Copyright', + # Source as in original device used to make image + # not as in who gave you the image + 'source' => 'Model', + 'software' => 'Software', + 'disclaimer' => 'Disclaimer', + 'warning' => 'ContentWarning', + 'url' => 'Identifier', # Not sure if this is best mapping. Maybe WebStatement. + 'label' => 'Label', + 'creation time' => 'DateTimeDigitized', + /* Other potentially useful things - Document */ + ); + $frameCount = 0; $loopCount = 1; + $text = array(); $duration = 0.0; + $bitDepth = 0; + $colorType = 'unknown'; - if (!$filename) + if ( !$filename ) { throw new Exception( __METHOD__ . ": No file name specified" ); - elseif ( !file_exists($filename) || is_dir($filename) ) + } elseif ( !file_exists( $filename ) || is_dir( $filename ) ) { throw new Exception( __METHOD__ . ": File $filename does not exist" ); - - $fh = fopen( $filename, 'r' ); - - if (!$fh) { + } + + $fh = fopen( $filename, 'rb' ); + + if ( !$fh ) { throw new Exception( __METHOD__ . ": Unable to open file $filename" ); } - + // Check for the PNG header $buf = fread( $fh, 8 ); if ( $buf != self::$png_sig ) { @@ -44,22 +79,54 @@ class PNGMetadataExtractor { } // Read chunks - while( !feof( $fh ) ) { + while ( !feof( $fh ) ) { $buf = fread( $fh, 4 ); - if( !$buf ) { + if ( !$buf || strlen( $buf ) < 4 ) { throw new Exception( __METHOD__ . ": Read error" ); } - $chunk_size = unpack( "N", $buf); + $chunk_size = unpack( "N", $buf ); $chunk_size = $chunk_size[1]; + if ( $chunk_size < 0 ) { + throw new Exception( __METHOD__ . ": Chunk size too big for unpack" ); + } + $chunk_type = fread( $fh, 4 ); - if( !$chunk_type ) { + if ( !$chunk_type || strlen( $chunk_type ) < 4 ) { throw new Exception( __METHOD__ . ": Read error" ); } - if ( $chunk_type == "acTL" ) { + if ( $chunk_type == "IHDR" ) { + $buf = self::read( $fh, $chunk_size ); + if ( !$buf || strlen( $buf ) < $chunk_size ) { + throw new Exception( __METHOD__ . ": Read error" ); + } + $bitDepth = ord( substr( $buf, 8, 1 ) ); + // Detect the color type in British English as per the spec + // http://www.w3.org/TR/PNG/#11IHDR + switch ( ord( substr( $buf, 9, 1 ) ) ) { + case 0: + $colorType = 'greyscale'; + break; + case 2: + $colorType = 'truecolour'; + break; + case 3: + $colorType = 'index-coloured'; + break; + case 4: + $colorType = 'greyscale-alpha'; + break; + case 6: + $colorType = 'truecolour-alpha'; + break; + default: + $colorType = 'unknown'; + break; + } + } elseif ( $chunk_type == "acTL" ) { $buf = fread( $fh, $chunk_size ); - if( !$buf ) { + if( !$buf || strlen( $buf ) < $chunk_size || $chunk_size < 4 ) { throw new Exception( __METHOD__ . ": Read error" ); } @@ -67,20 +134,216 @@ class PNGMetadataExtractor { $frameCount = $actl['frames']; $loopCount = $actl['plays']; } elseif ( $chunk_type == "fcTL" ) { - $buf = fread( $fh, $chunk_size ); - if( !$buf ) { + $buf = self::read( $fh, $chunk_size ); + if ( !$buf || strlen( $buf ) < $chunk_size ) { + throw new Exception( __METHOD__ . ": Read error" ); + } + $buf = substr( $buf, 20 ); + if ( strlen( $buf ) < 4 ) { throw new Exception( __METHOD__ . ": Read error" ); } - $buf = substr( $buf, 20 ); $fctldur = unpack( "ndelay_num/ndelay_den", $buf ); - if( $fctldur['delay_den'] == 0 ) $fctldur['delay_den'] = 100; - if( $fctldur['delay_num'] ) { + if ( $fctldur['delay_den'] == 0 ) { + $fctldur['delay_den'] = 100; + } + if ( $fctldur['delay_num'] ) { $duration += $fctldur['delay_num'] / $fctldur['delay_den']; } - } elseif ( ( $chunk_type == "IDAT" || $chunk_type == "IEND" ) && $frameCount == 0 ) { - // Not a valid animated image. No point in continuing. - break; + } elseif ( $chunk_type == "iTXt" ) { + // Extracts iTXt chunks, uncompressing if necessary. + $buf = self::read( $fh, $chunk_size ); + $items = array(); + if ( preg_match( + '/^([^\x00]{1,79})\x00(\x00|\x01)\x00([^\x00]*)(.)[^\x00]*\x00(.*)$/Ds', + $buf, $items ) + ) { + /* $items[1] = text chunk name, $items[2] = compressed flag, + * $items[3] = lang code (or ""), $items[4]= compression type. + * $items[5] = content + */ + + // Theoretically should be case-sensitive, but in practise... + $items[1] = strtolower( $items[1] ); + if ( !isset( self::$text_chunks[$items[1]] ) ) { + // Only extract textual chunks on our list. + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + + $items[3] = strtolower( $items[3] ); + if ( $items[3] == '' ) { + // if no lang specified use x-default like in xmp. + $items[3] = 'x-default'; + } + + // if compressed + if ( $items[2] == "\x01" ) { + if ( function_exists( 'gzuncompress' ) && $items[4] === "\x00" ) { + wfSuppressWarnings(); + $items[5] = gzuncompress( $items[5] ); + wfRestoreWarnings(); + + if ( $items[5] === false ) { + // decompression failed + wfDebug( __METHOD__ . ' Error decompressing iTxt chunk - ' . $items[1] ); + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + + } else { + wfDebug( __METHOD__ . ' Skipping compressed png iTXt chunk due to lack of zlib,' + . ' or potentially invalid compression method' ); + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + } + $finalKeyword = self::$text_chunks[ $items[1] ]; + $text[ $finalKeyword ][ $items[3] ] = $items[5]; + $text[ $finalKeyword ]['_type'] = 'lang'; + + } else { + // Error reading iTXt chunk + throw new Exception( __METHOD__ . ": Read error on iTXt chunk" ); + } + + } elseif ( $chunk_type == 'tEXt' ) { + $buf = self::read( $fh, $chunk_size ); + + // In case there is no \x00 which will make explode fail. + if ( strpos( $buf, "\x00" ) === false ) { + throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); + } + + list( $keyword, $content ) = explode( "\x00", $buf, 2 ); + if ( $keyword === '' || $content === '' ) { + throw new Exception( __METHOD__ . ": Read error on tEXt chunk" ); + } + + // Theoretically should be case-sensitive, but in practise... + $keyword = strtolower( $keyword ); + if ( !isset( self::$text_chunks[ $keyword ] ) ) { + // Don't recognize chunk, so skip. + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + wfSuppressWarnings(); + $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); + wfRestoreWarnings(); + + if ( $content === false ) { + throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); + } + + $finalKeyword = self::$text_chunks[ $keyword ]; + $text[ $finalKeyword ][ 'x-default' ] = $content; + $text[ $finalKeyword ]['_type'] = 'lang'; + + } elseif ( $chunk_type == 'zTXt' ) { + if ( function_exists( 'gzuncompress' ) ) { + $buf = self::read( $fh, $chunk_size ); + + // In case there is no \x00 which will make explode fail. + if ( strpos( $buf, "\x00" ) === false ) { + throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); + } + + list( $keyword, $postKeyword ) = explode( "\x00", $buf, 2 ); + if ( $keyword === '' || $postKeyword === '' ) { + throw new Exception( __METHOD__ . ": Read error on zTXt chunk" ); + } + // Theoretically should be case-sensitive, but in practise... + $keyword = strtolower( $keyword ); + + if ( !isset( self::$text_chunks[ $keyword ] ) ) { + // Don't recognize chunk, so skip. + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + $compression = substr( $postKeyword, 0, 1 ); + $content = substr( $postKeyword, 1 ); + if ( $compression !== "\x00" ) { + wfDebug( __METHOD__ . " Unrecognized compression method in zTXt ($keyword). Skipping." ); + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + + wfSuppressWarnings(); + $content = gzuncompress( $content ); + wfRestoreWarnings(); + + if ( $content === false ) { + // decompression failed + wfDebug( __METHOD__ . ' Error decompressing zTXt chunk - ' . $keyword ); + fseek( $fh, self::$CRC_size, SEEK_CUR ); + continue; + } + + wfSuppressWarnings(); + $content = iconv( 'ISO-8859-1', 'UTF-8', $content ); + wfRestoreWarnings(); + + if ( $content === false ) { + throw new Exception( __METHOD__ . ": Read error (error with iconv)" ); + } + + $finalKeyword = self::$text_chunks[ $keyword ]; + $text[ $finalKeyword ][ 'x-default' ] = $content; + $text[ $finalKeyword ]['_type'] = 'lang'; + + } else { + wfDebug( __METHOD__ . " Cannot decompress zTXt chunk due to lack of zlib. Skipping." ); + fseek( $fh, $chunk_size, SEEK_CUR ); + } + } elseif ( $chunk_type == 'tIME' ) { + // last mod timestamp. + if ( $chunk_size !== 7 ) { + throw new Exception( __METHOD__ . ": tIME wrong size" ); + } + $buf = self::read( $fh, $chunk_size ); + if ( !$buf || strlen( $buf ) < $chunk_size ) { + throw new Exception( __METHOD__ . ": Read error" ); + } + + // Note: spec says this should be UTC. + $t = unpack( "ny/Cm/Cd/Ch/Cmin/Cs", $buf ); + $strTime = sprintf( "%04d%02d%02d%02d%02d%02d", + $t['y'], $t['m'], $t['d'], $t['h'], + $t['min'], $t['s'] ); + + $exifTime = wfTimestamp( TS_EXIF, $strTime ); + + if ( $exifTime ) { + $text['DateTime'] = $exifTime; + } + + } elseif ( $chunk_type == 'pHYs' ) { + // how big pixels are (dots per meter). + if ( $chunk_size !== 9 ) { + throw new Exception( __METHOD__ . ": pHYs wrong size" ); + } + + $buf = self::read( $fh, $chunk_size ); + if ( !$buf || strlen( $buf ) < $chunk_size ) { + throw new Exception( __METHOD__ . ": Read error" ); + } + + $dim = unpack( "Nwidth/Nheight/Cunit", $buf ); + if ( $dim['unit'] == 1 ) { + // Need to check for negative because php + // doesn't deal with super-large unsigned 32-bit ints well + if ( $dim['width'] > 0 && $dim['height'] > 0 ) { + // unit is meters + // (as opposed to 0 = undefined ) + $text['XResolution'] = $dim['width'] + . '/100'; + $text['YResolution'] = $dim['height'] + . '/100'; + $text['ResolutionUnit'] = 3; + // 3 = dots per cm (from Exif). + } + } + } elseif ( $chunk_type == "IEND" ) { break; } else { @@ -90,15 +353,59 @@ class PNGMetadataExtractor { } fclose( $fh ); - if( $loopCount > 1 ) { + if ( $loopCount > 1 ) { $duration *= $loopCount; } + if ( isset( $text['DateTimeDigitized'] ) ) { + // Convert date format from rfc2822 to exif. + foreach ( $text['DateTimeDigitized'] as $name => &$value ) { + if ( $name === '_type' ) { + continue; + } + + // @todo FIXME: Currently timezones are ignored. + // possibly should be wfTimestamp's + // responsibility. (at least for numeric TZ) + $formatted = wfTimestamp( TS_EXIF, $value ); + if ( $formatted ) { + // Only change if we could convert the + // date. + // The png standard says it should be + // in rfc2822 format, but not required. + // In general for the exif stuff we + // prettify the date if we can, but we + // display as-is if we cannot or if + // it is invalid. + // So do the same here. + + $value = $formatted; + } + } + } return array( 'frameCount' => $frameCount, 'loopCount' => $loopCount, - 'duration' => $duration + 'duration' => $duration, + 'text' => $text, + 'bitDepth' => $bitDepth, + 'colorType' => $colorType, ); - + + } + /** + * Read a chunk, checking to make sure its not too big. + * + * @param $fh resource The file handle + * @param $size Integer size in bytes. + * @throws Exception if too big. + * @return String The chunk. + */ + static private function read( $fh, $size ) { + if ( $size > self::MAX_CHUNK_SIZE ) { + throw new Exception( __METHOD__ . ': Chunk size of ' . $size . + ' too big. Max size is: ' . self::MAX_CHUNK_SIZE ); + } + return fread( $fh, $size ); } } diff --git a/includes/media/SVG.php b/includes/media/SVG.php index a78be952..ceffd7c3 100644 --- a/includes/media/SVG.php +++ b/includes/media/SVG.php @@ -32,6 +32,10 @@ class SvgHandler extends ImageHandler { return true; } + /** + * @param $file File + * @return bool + */ function isAnimatedImage( $file ) { # TODO: detect animated SVGs $metadata = $file->getMetadata(); @@ -44,14 +48,17 @@ class SvgHandler extends ImageHandler { return false; } + /** + * @param $image File + * @param $params + * @return bool + */ function normaliseParams( $image, &$params ) { global $wgSVGMaxSize; if ( !parent::normaliseParams( $image, $params ) ) { return false; } # Don't make an image bigger than wgMaxSVGSize on the smaller side - $params['physicalWidth'] = $params['width']; - $params['physicalHeight'] = $params['height']; if ( $params['physicalWidth'] <= $params['physicalHeight'] ) { if ( $params['physicalWidth'] > $wgSVGMaxSize ) { $srcWidth = $image->getWidth( $params['page'] ); @@ -70,6 +77,14 @@ class SvgHandler extends ImageHandler { return true; } + /** + * @param $image File + * @param $dstPath + * @param $dstUrl + * @param $params + * @param int $flags + * @return bool|MediaTransformError|ThumbnailImage|TransformParameterError + */ function doTransform( $image, $dstPath, $dstUrl, $params, $flags = 0 ) { if ( !$this->normaliseParams( $image, $params ) ) { return new TransformParameterError( $params ); @@ -97,7 +112,7 @@ class SvgHandler extends ImageHandler { } } - /* + /** * Transform an SVG file to PNG * This function can be called outside of thumbnail contexts * @param string $srcPath @@ -111,19 +126,32 @@ class SvgHandler extends ImageHandler { $err = false; $retval = ''; if ( isset( $wgSVGConverters[$wgSVGConverter] ) ) { - $cmd = str_replace( - array( '$path/', '$width', '$height', '$input', '$output' ), - array( $wgSVGConverterPath ? wfEscapeShellArg( "$wgSVGConverterPath/" ) : "", - intval( $width ), - intval( $height ), - wfEscapeShellArg( $srcPath ), - wfEscapeShellArg( $dstPath ) ), - $wgSVGConverters[$wgSVGConverter] - ) . " 2>&1"; - wfProfileIn( 'rsvg' ); - wfDebug( __METHOD__.": $cmd\n" ); - $err = wfShellExec( $cmd, $retval ); - wfProfileOut( 'rsvg' ); + if ( is_array( $wgSVGConverters[$wgSVGConverter] ) ) { + // This is a PHP callable + $func = $wgSVGConverters[$wgSVGConverter][0]; + $args = array_merge( array( $srcPath, $dstPath, $width, $height ), + array_slice( $wgSVGConverters[$wgSVGConverter], 1 ) ); + if ( !is_callable( $func ) ) { + throw new MWException( "$func is not callable" ); + } + $err = call_user_func_array( $func, $args ); + $retval = (bool)$err; + } else { + // External command + $cmd = str_replace( + array( '$path/', '$width', '$height', '$input', '$output' ), + array( $wgSVGConverterPath ? wfEscapeShellArg( "$wgSVGConverterPath/" ) : "", + intval( $width ), + intval( $height ), + wfEscapeShellArg( $srcPath ), + wfEscapeShellArg( $dstPath ) ), + $wgSVGConverters[$wgSVGConverter] + ) . " 2>&1"; + wfProfileIn( 'rsvg' ); + wfDebug( __METHOD__.": $cmd\n" ); + $err = wfShellExec( $cmd, $retval ); + wfProfileOut( 'rsvg' ); + } } $removed = $this->removeBadFile( $dstPath, $retval ); if ( $retval != 0 || $removed ) { @@ -133,7 +161,27 @@ class SvgHandler extends ImageHandler { } return true; } + + public static function rasterizeImagickExt( $srcPath, $dstPath, $width, $height ) { + $im = new Imagick( $srcPath ); + $im->setImageFormat( 'png' ); + $im->setBackgroundColor( 'transparent' ); + $im->setImageDepth( 8 ); + + if ( !$im->thumbnailImage( intval( $width ), intval( $height ), /* fit */ false ) ) { + return 'Could not resize image'; + } + if ( !$im->writeImage( $dstPath ) ) { + return "Could not write to $dstPath"; + } + } + /** + * @param $file File + * @param $path + * @param bool $metadata + * @return array + */ function getImageSize( $file, $path, $metadata = false ) { if ( $metadata === false ) { $metadata = $file->getMetaData(); @@ -150,6 +198,10 @@ class SvgHandler extends ImageHandler { return array( 'png', 'image/png' ); } + /** + * @param $file File + * @return string + */ function getLongDesc( $file ) { global $wgLang; return wfMsgExt( 'svg-long-desc', 'parseinline', @@ -171,7 +223,9 @@ class SvgHandler extends ImageHandler { } function unpackMetadata( $metadata ) { - $unser = @unserialize( $metadata ); + wfSuppressWarnings(); + $unser = unserialize( $metadata ); + wfRestoreWarnings(); if ( isset( $unser['version'] ) && $unser['version'] == self::SVG_METADATA_VERSION ) { return $unser; } else { @@ -192,6 +246,10 @@ class SvgHandler extends ImageHandler { return $fields; } + /** + * @param $file File + * @return array|bool + */ function formatMetadata( $file ) { $result = array( 'visible' => array(), diff --git a/includes/media/SVGMetadataExtractor.php b/includes/media/SVGMetadataExtractor.php index 66ae1edf..22ef8e61 100644 --- a/includes/media/SVGMetadataExtractor.php +++ b/includes/media/SVGMetadataExtractor.php @@ -55,7 +55,7 @@ class SVGReader { $size = filesize( $source ); if ( $size === false ) { throw new MWException( "Error getting filesize of SVG." ); - } + } if ( $size > $wgSVGMetadataCutoff ) { $this->debug( "SVG is $size bytes, which is bigger than $wgSVGMetadataCutoff. Truncating." ); @@ -84,14 +84,14 @@ class SVGReader { wfRestoreWarnings(); } - /* + /** * @return Array with the known metadata */ public function getMetadata() { return $this->metadata; } - /* + /** * Read the SVG */ public function read() { @@ -139,10 +139,12 @@ class SVGReader { $keepReading = $this->reader->next(); } + $this->reader->close(); + return true; } - /* + /** * Read a textelement from an element * * @param String $name of the element that we are reading from @@ -155,7 +157,7 @@ class SVGReader { } $keepReading = $this->reader->read(); while( $keepReading ) { - if( $this->reader->localName == $name && $this->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) { + if( $this->reader->localName == $name && $this->reader->namespaceURI == self::NS_SVG && $this->reader->nodeType == XmlReader::END_ELEMENT ) { break; } elseif( $this->reader->nodeType == XmlReader::TEXT ){ $this->metadata[$metafield] = trim( $this->reader->value ); @@ -175,20 +177,27 @@ class SVGReader { return; } // TODO: find and store type of xml snippet. metadata['metadataType'] = "rdf" - $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); + if( method_exists( $this->reader, 'readInnerXML' ) ) { + $this->metadata[$metafield] = trim( $this->reader->readInnerXML() ); + } else { + throw new MWException( "The PHP XMLReader extension does not come with readInnerXML() method. Your libxml is probably out of date (need 2.6.20 or later)." ); + } $this->reader->next(); } - /* + /** * Filter all children, looking for animate elements * * @param String $name of the element that we are reading from */ private function animateFilter( $name ) { - $this->debug ( "animate filter" ); + $this->debug ( "animate filter for tag $name" ); if( $this->reader->nodeType != XmlReader::ELEMENT ) { return; } + if ( $this->reader->isEmptyElement ) { + return; + } $exitDepth = $this->reader->depth; $keepReading = $this->reader->read(); while( $keepReading ) { @@ -230,7 +239,7 @@ class SVGReader { wfDebug( "SVGReader WARN: $data\n" ); } - /* + /** * Parse the attributes of an SVG element * * The parser has to be in the start element of <svg> diff --git a/includes/media/Tiff.php b/includes/media/Tiff.php index 8773201f..0f317e1a 100644 --- a/includes/media/Tiff.php +++ b/includes/media/Tiff.php @@ -11,27 +11,74 @@ * * @ingroup Media */ -class TiffHandler extends BitmapHandler { +class TiffHandler extends ExifBitmapHandler { /** * Conversion to PNG for inline display can be disabled here... * Note scaling should work with ImageMagick, but may not with GD scaling. + * + * Files pulled from an another MediaWiki instance via ForeignAPIRepo / + * InstantCommons will have thumbnails managed from the remote instance, + * so we can skip this check. + * + * @param $file + * + * @return bool */ function canRender( $file ) { global $wgTiffThumbnailType; - return (bool)$wgTiffThumbnailType; + return (bool)$wgTiffThumbnailType + || ($file->getRepo() instanceof ForeignAPIRepo); } /** * Browsers don't support TIFF inline generally... * For inline display, we need to convert to PNG. + * + * @param $file + * + * @return bool */ function mustRender( $file ) { return true; } + /** + * @param $ext + * @param $mime + * @param $params + * @return bool + */ function getThumbType( $ext, $mime, $params = null ) { global $wgTiffThumbnailType; return $wgTiffThumbnailType; } + + /** + * @param $image + * @param $filename + * @return string + */ + function getMetadata( $image, $filename ) { + global $wgShowEXIF; + if ( $wgShowEXIF ) { + try { + $meta = BitmapMetadataHandler::Tiff( $filename ); + if ( !is_array( $meta ) ) { + // This should never happen, but doesn't hurt to be paranoid. + throw new MWException('Metadata array is not an array'); + } + $meta['MEDIAWIKI_EXIF_VERSION'] = Exif::version(); + return serialize( $meta ); + } + catch ( MWException $e ) { + // BitmapMetadataHandler throws an exception in certain exceptional + // cases like if file does not exist. + wfDebug( __METHOD__ . ': ' . $e->getMessage() . "\n" ); + return ExifBitmapHandler::BROKEN_FILE; + } + } else { + return ''; + } + } } diff --git a/includes/media/XMP.php b/includes/media/XMP.php new file mode 100644 index 00000000..1e578582 --- /dev/null +++ b/includes/media/XMP.php @@ -0,0 +1,1174 @@ +<?php +/** +* Class for reading xmp data containing properties relevant to +* images, and spitting out an array that FormatExif accepts. +* +* Note, this is not meant to recognize every possible thing you can +* encode in XMP. It should recognize all the properties we want. +* For example it doesn't have support for structures with multiple +* nesting levels, as none of the properties we're supporting use that +* feature. If it comes across properties it doesn't recognize, it should +* ignore them. +* +* The public methods one would call in this class are +* - parse( $content ) +* Reads in xmp content. +* Can potentially be called multiple times with partial data each time. +* - parseExtended( $content ) +* Reads XMPExtended blocks (jpeg files only). +* - getResults +* Outputs a results array. +* +* Note XMP kind of looks like rdf. They are not the same thing - XMP is +* encoded as a specific subset of rdf. This class can read XMP. It cannot +* read rdf. +* +*/ +class XMPReader { + + private $curItem = array(); // array to hold the current element (and previous element, and so on) + private $ancestorStruct = false; // the structure name when processing nested structures. + private $charContent = false; // temporary holder for character data that appears in xmp doc. + private $mode = array(); // stores the state the xmpreader is in (see MODE_FOO constants) + private $results = array(); // array to hold results + private $processingArray = false; // if we're doing a seq or bag. + private $itemLang = false; // used for lang alts only + + private $xmlParser; + private $charset = false; + private $extendedXMPOffset = 0; + + protected $items; + + /** + * These are various mode constants. + * they are used to figure out what to do + * with an element when its encountered. + * + * For example, MODE_IGNORE is used when processing + * a property we're not interested in. So if a new + * element pops up when we're in that mode, we ignore it. + */ + const MODE_INITIAL = 0; + const MODE_IGNORE = 1; + const MODE_LI = 2; + const MODE_LI_LANG = 3; + const MODE_QDESC = 4; + + // The following MODE constants are also used in the + // $items array to denote what type of property the item is. + const MODE_SIMPLE = 10; + const MODE_STRUCT = 11; // structure (associative array) + const MODE_SEQ = 12; // ordered list + const MODE_BAG = 13; // unordered list + const MODE_LANG = 14; + const MODE_ALT = 15; // non-language alt. Currently not implemented, and not needed atm. + const MODE_BAGSTRUCT = 16; // A BAG of Structs. + + const NS_RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; + const NS_XML = 'http://www.w3.org/XML/1998/namespace'; + + + /** + * Constructor. + * + * Primary job is to initialize the XMLParser + */ + function __construct() { + + if ( !function_exists( 'xml_parser_create_ns' ) ) { + // this should already be checked by this point + throw new MWException( 'XMP support requires XML Parser' ); + } + + $this->items = XMPInfo::getItems(); + + $this->resetXMLParser(); + + } + /** + * Main use is if a single item has multiple xmp documents describing it. + * For example in jpeg's with extendedXMP + */ + private function resetXMLParser() { + + if ($this->xmlParser) { + //is this needed? + xml_parser_free( $this->xmlParser ); + } + + $this->xmlParser = xml_parser_create_ns( 'UTF-8', ' ' ); + xml_parser_set_option( $this->xmlParser, XML_OPTION_CASE_FOLDING, 0 ); + xml_parser_set_option( $this->xmlParser, XML_OPTION_SKIP_WHITE, 1 ); + + xml_set_element_handler( $this->xmlParser, + array( $this, 'startElement' ), + array( $this, 'endElement' ) ); + + xml_set_character_data_handler( $this->xmlParser, array( $this, 'char' ) ); + } + + /** Destroy the xml parser + * + * Not sure if this is actually needed. + */ + function __destruct() { + // not sure if this is needed. + xml_parser_free( $this->xmlParser ); + } + + /** Get the result array. Do some post-processing before returning + * the array, and transform any metadata that is special-cased. + * + * @return Array array of results as an array of arrays suitable for + * FormatMetadata::getFormattedData(). + */ + public function getResults() { + // xmp-special is for metadata that affects how stuff + // is extracted. For example xmpNote:HasExtendedXMP. + + // It is also used to handle photoshop:AuthorsPosition + // which is weird and really part of another property, + // see 2:85 in IPTC. See also pg 21 of IPTC4XMP standard. + // The location fields also use it. + + $data = $this->results; + + wfRunHooks('XMPGetResults', Array(&$data)); + + if ( isset( $data['xmp-special']['AuthorsPosition'] ) + && is_string( $data['xmp-special']['AuthorsPosition'] ) + && isset( $data['xmp-general']['Artist'][0] ) + ) { + // Note, if there is more than one creator, + // this only applies to first. This also will + // only apply to the dc:Creator prop, not the + // exif:Artist prop. + + $data['xmp-general']['Artist'][0] = + $data['xmp-special']['AuthorsPosition'] . ', ' + . $data['xmp-general']['Artist'][0]; + } + + // Go through the LocationShown and LocationCreated + // changing it to the non-hierarchal form used by + // the other location fields. + + if ( isset( $data['xmp-special']['LocationShown'][0] ) + && is_array( $data['xmp-special']['LocationShown'][0] ) + ) { + // the is_array is just paranoia. It should always + // be an array. + foreach( $data['xmp-special']['LocationShown'] as $loc ) { + if ( !is_array( $loc ) ) { + // To avoid copying over the _type meta-fields. + continue; + } + foreach( $loc as $field => $val ) { + $data['xmp-general'][$field . 'Dest'][] = $val; + } + } + } + if ( isset( $data['xmp-special']['LocationCreated'][0] ) + && is_array( $data['xmp-special']['LocationCreated'][0] ) + ) { + // the is_array is just paranoia. It should always + // be an array. + foreach( $data['xmp-special']['LocationCreated'] as $loc ) { + if ( !is_array( $loc ) ) { + // To avoid copying over the _type meta-fields. + continue; + } + foreach( $loc as $field => $val ) { + $data['xmp-general'][$field . 'Created'][] = $val; + } + } + } + + + // We don't want to return the special values, since they're + // special and not info to be stored about the file. + unset( $data['xmp-special'] ); + + // Convert GPSAltitude to negative if below sea level. + if ( isset( $data['xmp-exif']['GPSAltitudeRef'] ) ) { + if ( $data['xmp-exif']['GPSAltitudeRef'] == '1' + && isset( $data['xmp-exif']['GPSAltitude'] ) + ) { + $data['xmp-exif']['GPSAltitude'] *= -1; + } + unset( $data['xmp-exif']['GPSAltitudeRef'] ); + } + + return $data; + } + + /** + * Main function to call to parse XMP. Use getResults to + * get results. + * + * Also catches any errors during processing, writes them to + * debug log, blanks result array and returns false. + * + * @param String: $content XMP data + * @param Boolean: $allOfIt If this is all the data (true) or if its split up (false). Default true + * @param Boolean: $reset - does xml parser need to be reset. Default false + * @return Boolean success. + */ + public function parse( $content, $allOfIt = true, $reset = false ) { + if ( $reset ) { + $this->resetXMLParser(); + } + try { + + // detect encoding by looking for BOM which is supposed to be in processing instruction. + // see page 12 of http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart3.pdf + if ( !$this->charset ) { + $bom = array(); + if ( preg_match( '/\xEF\xBB\xBF|\xFE\xFF|\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\xFF\xFE/', + $content, $bom ) + ) { + switch ( $bom[0] ) { + case "\xFE\xFF": + $this->charset = 'UTF-16BE'; + break; + case "\xFF\xFE": + $this->charset = 'UTF-16LE'; + break; + case "\x00\x00\xFE\xFF": + $this->charset = 'UTF-32BE'; + break; + case "\xFF\xFE\x00\x00": + $this->charset = 'UTF-32LE'; + break; + case "\xEF\xBB\xBF": + $this->charset = 'UTF-8'; + break; + default: + //this should be impossible to get to + throw new MWException("Invalid BOM"); + break; + + } + + } else { + // standard specifically says, if no bom assume utf-8 + $this->charset = 'UTF-8'; + } + } + if ( $this->charset !== 'UTF-8' ) { + //don't convert if already utf-8 + wfSuppressWarnings(); + $content = iconv( $this->charset, 'UTF-8//IGNORE', $content ); + wfRestoreWarnings(); + } + + $ok = xml_parse( $this->xmlParser, $content, $allOfIt ); + if ( !$ok ) { + $error = xml_error_string( xml_get_error_code( $this->xmlParser ) ); + $where = 'line: ' . xml_get_current_line_number( $this->xmlParser ) + . ' column: ' . xml_get_current_column_number( $this->xmlParser ) + . ' byte offset: ' . xml_get_current_byte_index( $this->xmlParser ); + + wfDebugLog( 'XMP', "XMPReader::parse : Error reading XMP content: $error ($where)" ); + $this->results = array(); // blank if error. + return false; + } + } catch ( MWException $e ) { + wfDebugLog( 'XMP', 'XMP parse error: ' . $e ); + $this->results = array(); + return false; + } + return true; + } + + /** Entry point for XMPExtended blocks in jpeg files + * + * @todo In serious need of testing + * @see http://www.adobe.ge/devnet/xmp/pdfs/XMPSpecificationPart3.pdf XMP spec part 3 page 20 + * @param String $content XMPExtended block minus the namespace signature + * @return Boolean If it succeeded. + */ + public function parseExtended( $content ) { + // @todo FIXME: This is untested. Hard to find example files + // or programs that make such files.. + $guid = substr( $content, 0, 32 ); + if ( !isset( $this->results['xmp-special']['HasExtendedXMP'] ) + || $this->results['xmp-special']['HasExtendedXMP'] !== $guid ) { + wfDebugLog('XMP', __METHOD__ . " Ignoring XMPExtended block due to wrong guid (guid= '$guid' )"); + return false; + } + $len = unpack( 'Nlength/Noffset', substr( $content, 32, 8 ) ); + + if (!$len || $len['length'] < 4 || $len['offset'] < 0 || $len['offset'] > $len['length'] ) { + wfDebugLog('XMP', __METHOD__ . 'Error reading extended XMP block, invalid length or offset.'); + return false; + } + + + // we're not very robust here. we should accept it in the wrong order. To quote + // the xmp standard: + // "A JPEG writer should write the ExtendedXMP marker segments in order, immediately following the + // StandardXMP. However, the JPEG standard does not require preservation of marker segment order. A + // robust JPEG reader should tolerate the marker segments in any order." + // + // otoh the probability that an image will have more than 128k of metadata is rather low... + // so the probability that it will have > 128k, and be in the wrong order is very low... + + if ( $len['offset'] !== $this->extendedXMPOffset ) { + wfDebugLog('XMP', __METHOD__ . 'Ignoring XMPExtended block due to wrong order. (Offset was ' + . $len['offset'] . ' but expected ' . $this->extendedXMPOffset . ')'); + return false; + } + + if ( $len['offset'] === 0 ) { + // if we're starting the extended block, we've probably already + // done the XMPStandard block, so reset. + $this->resetXMLParser(); + } + + $this->extendedXMPOffset += $len['length']; + + $actualContent = substr( $content, 40 ); + + if ( $this->extendedXMPOffset === strlen( $actualContent ) ) { + $atEnd = true; + } else { + $atEnd = false; + } + + wfDebugLog('XMP', __METHOD__ . 'Parsing a XMPExtended block'); + return $this->parse( $actualContent, $atEnd ); + } + + /** + * Character data handler + * Called whenever character data is found in the xmp document. + * + * does nothing if we're in MODE_IGNORE or if the data is whitespace + * throws an error if we're not in MODE_SIMPLE (as we're not allowed to have character + * data in the other modes). + * + * As an example, this happens when we encounter XMP like: + * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> + * and are processing the 0/10 bit. + * + * @param $parser XMLParser reference to the xml parser + * @param $data String Character data + * @throws MWException on invalid data + */ + function char( $parser, $data ) { + + $data = trim( $data ); + if ( trim( $data ) === "" ) { + return; + } + + if ( !isset( $this->mode[0] ) ) { + throw new MWException( 'Unexpected character data before first rdf:Description element' ); + } + + if ( $this->mode[0] === self::MODE_IGNORE ) return; + + if ( $this->mode[0] !== self::MODE_SIMPLE + && $this->mode[0] !== self::MODE_QDESC + ) { + throw new MWException( 'character data where not expected. (mode ' . $this->mode[0] . ')' ); + } + + // to check, how does this handle w.s. + if ( $this->charContent === false ) { + $this->charContent = $data; + } else { + $this->charContent .= $data; + } + + } + + /** When we hit a closing element in MODE_IGNORE + * Check to see if this is the element we started to ignore, + * in which case we get out of MODE_IGNORE + * + * @param $elm String Namespace of element followed by a space and then tag name of element. + */ + private function endElementModeIgnore ( $elm ) { + + if ( $this->curItem[0] === $elm ) { + array_shift( $this->curItem ); + array_shift( $this->mode ); + } + return; + + } + + /** + * Hit a closing element when in MODE_SIMPLE. + * This generally means that we finished processing a + * property value, and now have to save the result to the + * results array + * + * For example, when processing: + * <exif:DigitalZoomRatio>0/10</exif:DigitalZoomRatio> + * this deals with when we hit </exif:DigitalZoomRatio>. + * + * Or it could be if we hit the end element of a property + * of a compound data structure (like a member of an array). + * + * @param $elm String namespace, space, and tag name. + */ + private function endElementModeSimple ( $elm ) { + if ( $this->charContent !== false ) { + if ( $this->processingArray ) { + // if we're processing an array, use the original element + // name instead of rdf:li. + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + } else { + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + } + $this->saveValue( $ns, $tag, $this->charContent ); + + $this->charContent = false; // reset + } + array_shift( $this->curItem ); + array_shift( $this->mode ); + + } + + /** + * Hit a closing element in MODE_STRUCT, MODE_SEQ, MODE_BAG + * generally means we've finished processing a nested structure. + * resets some internal variables to indicate that. + * + * Note this means we hit the </closing element> not the </rdf:Seq>. + * + * For example, when processing: + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> + * </rdf:Seq> </exif:ISOSpeedRatings> + * + * This method is called when we hit the </exif:ISOSpeedRatings> tag. + * + * @param $elm String namespace . space . tag name. + */ + private function endElementNested( $elm ) { + + /* cur item must be the same as $elm, unless if in MODE_STRUCT + in which case it could also be rdf:Description */ + if ( $this->curItem[0] !== $elm + && !( $elm === self::NS_RDF . ' Description' + && $this->mode[0] === self::MODE_STRUCT ) + ) { + throw new MWException( "nesting mismatch. got a </$elm> but expected a </" . $this->curItem[0] . '>' ); + } + + // Validate structures. + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + if ( isset( $this->items[$ns][$tag]['validate'] ) ) { + + $info =& $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + + $validate = is_array( $info['validate'] ) ? $info['validate'] + : array( 'XMPValidate', $info['validate'] ); + + if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { + // This can happen if all the members of the struct failed validation. + wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> has no valid members." ); + + } elseif ( is_callable( $validate ) ) { + $val =& $this->results['xmp-' . $info['map_group']][$finalName]; + call_user_func_array( $validate, array( $info, &$val, false ) ); + if ( is_null( $val ) ) { + // the idea being the validation function will unset the variable if + // its invalid. + wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); + unset( $this->results['xmp-' . $info['map_group']][$finalName] ); + } + } else { + wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" + . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); + } + } + + array_shift( $this->curItem ); + array_shift( $this->mode ); + $this->ancestorStruct = false; + $this->processingArray = false; + $this->itemLang = false; + } + + /** + * Hit a closing element in MODE_LI (either rdf:Seq, or rdf:Bag ) + * Add information about what type of element this is. + * + * Note we still have to hit the outer </property> + * + * For example, when processing: + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> + * </rdf:Seq> </exif:ISOSpeedRatings> + * + * This method is called when we hit the </rdf:Seq>. + * (For comparison, we call endElementModeSimple when we + * hit the </rdf:li>) + * + * @param $elm String namespace . ' ' . element name + */ + private function endElementModeLi( $elm ) { + + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $info = $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + + array_shift( $this->mode ); + + if ( !isset( $this->results['xmp-' . $info['map_group']][$finalName] ) ) { + wfDebugLog( 'XMP', __METHOD__ . " Empty compund element $finalName." ); + return; + } + + if ( $elm === self::NS_RDF . ' Seq' ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ol'; + } elseif ( $elm === self::NS_RDF . ' Bag' ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'ul'; + } elseif ( $elm === self::NS_RDF . ' Alt' ) { + // extra if needed as you could theoretically have a non-language alt. + if ( $info['mode'] === self::MODE_LANG ) { + $this->results['xmp-' . $info['map_group']][$finalName]['_type'] = 'lang'; + } + + } else { + throw new MWException( __METHOD__ . " expected </rdf:seq> or </rdf:bag> but instead got $elm." ); + } + } + + /** + * End element while in MODE_QDESC + * mostly when ending an element when we have a simple value + * that has qualifiers. + * + * Qualifiers aren't all that common, and we don't do anything + * with them. + * + * @param $elm String namespace and element + */ + private function endElementModeQDesc( $elm ) { + + if ( $elm === self::NS_RDF . ' value' ) { + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $this->saveValue( $ns, $tag, $this->charContent ); + return; + } else { + array_shift( $this->mode ); + array_shift( $this->curItem ); + } + + + } + + /** + * Handler for hitting a closing element. + * + * generally just calls a helper function depending on what + * mode we're in. + * + * Ignores the outer wrapping elements that are optional in + * xmp and have no meaning. + * + * @param $parser XMLParser + * @param $elm String namespace . ' ' . element name + */ + function endElement( $parser, $elm ) { + if ( $elm === ( self::NS_RDF . ' RDF' ) + || $elm === 'adobe:ns:meta/ xmpmeta' + || $elm === 'adobe:ns:meta/ xapmeta' ) + { + // ignore these. + return; + } + + if ( $elm === self::NS_RDF . ' type' ) { + // these aren't really supported properly yet. + // However, it appears they almost never used. + wfDebugLog( 'XMP', __METHOD__ . ' encountered <rdf:type>' ); + } + + if ( strpos( $elm, ' ' ) === false ) { + // This probably shouldn't happen. + // However, there is a bug in an adobe product + // that forgets the namespace on some things. + // (Luckily they are unimportant things). + wfDebugLog( 'XMP', __METHOD__ . " Encountered </$elm> which has no namespace. Skipping." ); + return; + } + + if ( count( $this->mode[0] ) === 0 ) { + // This should never ever happen and means + // there is a pretty major bug in this class. + throw new MWException( 'Encountered end element with no mode' ); + } + + if ( count( $this->curItem ) == 0 && $this->mode[0] !== self::MODE_INITIAL ) { + // just to be paranoid. Should always have a curItem, except for initially + // (aka during MODE_INITAL). + throw new MWException( "Hit end element </$elm> but no curItem" ); + } + + switch( $this->mode[0] ) { + case self::MODE_IGNORE: + $this->endElementModeIgnore( $elm ); + break; + case self::MODE_SIMPLE: + $this->endElementModeSimple( $elm ); + break; + case self::MODE_STRUCT: + case self::MODE_SEQ: + case self::MODE_BAG: + case self::MODE_LANG: + case self::MODE_BAGSTRUCT: + $this->endElementNested( $elm ); + break; + case self::MODE_INITIAL: + if ( $elm === self::NS_RDF . ' Description' ) { + array_shift( $this->mode ); + } else { + throw new MWException( 'Element ended unexpectedly while in MODE_INITIAL' ); + } + break; + case self::MODE_LI: + case self::MODE_LI_LANG: + $this->endElementModeLi( $elm ); + break; + case self::MODE_QDESC: + $this->endElementModeQDesc( $elm ); + break; + default: + wfDebugLog( 'XMP', __METHOD__ . " no mode (elm = $elm)" ); + break; + } + } + + /** + * Hit an opening element while in MODE_IGNORE + * + * XMP is extensible, so ignore any tag we don't understand. + * + * Mostly ignores, unless we encounter the element that we are ignoring. + * in which case we add it to the item stack, so we can ignore things + * that are nested, correctly. + * + * @param $elm String namespace . ' ' . tag name + */ + private function startElementModeIgnore( $elm ) { + if ( $elm === $this->curItem[0] ) { + array_unshift( $this->curItem, $elm ); + array_unshift( $this->mode, self::MODE_IGNORE ); + } + } + + /** + * Start element in MODE_BAG (unordered array) + * this should always be <rdf:Bag> + * + * @param $elm String namespace . ' ' . tag + * @throws MWException if we have an element that's not <rdf:Bag> + */ + private function startElementModeBag( $elm ) { + if ( $elm === self::NS_RDF . ' Bag' ) { + array_unshift( $this->mode, self::MODE_LI ); + } else { + throw new MWException( "Expected <rdf:Bag> but got $elm." ); + } + + } + + /** + * Start element in MODE_SEQ (ordered array) + * this should always be <rdf:Seq> + * + * @param $elm String namespace . ' ' . tag + * @throws MWException if we have an element that's not <rdf:Seq> + */ + private function startElementModeSeq( $elm ) { + if ( $elm === self::NS_RDF . ' Seq' ) { + array_unshift( $this->mode, self::MODE_LI ); + } elseif ( $elm === self::NS_RDF . ' Bag' ) { + # bug 27105 + wfDebugLog( 'XMP', __METHOD__ . ' Expected an rdf:Seq, but got an rdf:Bag. Pretending' + . ' it is a Seq, since some buggy software is known to screw this up.' ); + array_unshift( $this->mode, self::MODE_LI ); + } else { + throw new MWException( "Expected <rdf:Seq> but got $elm." ); + } + + } + + /** + * Start element in MODE_LANG (language alternative) + * this should always be <rdf:Alt> + * + * This tag tends to be used for metadata like describe this + * picture, which can be translated into multiple languages. + * + * XMP supports non-linguistic alternative selections, + * which are really only used for thumbnails, which + * we don't care about. + * + * @param $elm String namespace . ' ' . tag + * @throws MWException if we have an element that's not <rdf:Alt> + */ + private function startElementModeLang( $elm ) { + if ( $elm === self::NS_RDF . ' Alt' ) { + array_unshift( $this->mode, self::MODE_LI_LANG ); + } else { + throw new MWException( "Expected <rdf:Seq> but got $elm." ); + } + + } + + /** + * Handle an opening element when in MODE_SIMPLE + * + * This should not happen often. This is for if a simple element + * already opened has a child element. Could happen for a + * qualified element. + * + * For example: + * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> + * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> + * </exif:DigitalZoomRatio> + * + * This method is called when processing the <rdf:Description> element + * + * @param $elm String namespace and tag names separated by space. + * @param $attribs Array Attributes of the element. + */ + private function startElementModeSimple( $elm, $attribs ) { + if ( $elm === self::NS_RDF . ' Description' ) { + // If this value has qualifiers + array_unshift( $this->mode, self::MODE_QDESC ); + array_unshift( $this->curItem, $this->curItem[0] ); + + if ( isset( $attribs[self::NS_RDF . ' value'] ) ) { + list( $ns, $tag ) = explode( ' ', $this->curItem[0], 2 ); + $this->saveValue( $ns, $tag, $attribs[self::NS_RDF . ' value'] ); + } + } elseif ( $elm === self::NS_RDF . ' value' ) { + // This should not be here. + throw new MWException( __METHOD__ . ' Encountered <rdf:value> where it was unexpected.' ); + + } else { + // something else we don't recognize, like a qualifier maybe. + wfDebugLog( 'XMP', __METHOD__ . " Encountered element <$elm> where only expecting character data as value of " . $this->curItem[0] ); + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + + } + + } + + /** + * Start an element when in MODE_QDESC. + * This generally happens when a simple element has an inner + * rdf:Description to hold qualifier elements. + * + * For example in: + * <exif:DigitalZoomRatio><rdf:Description><rdf:value>0/10</rdf:value> + * <foo:someQualifier>Bar</foo:someQualifier> </rdf:Description> + * </exif:DigitalZoomRatio> + * Called when processing the <rdf:value> or <foo:someQualifier>. + * + * @param $elm String namespace and tag name separated by a space. + * + */ + private function startElementModeQDesc( $elm ) { + if ( $elm === self::NS_RDF . ' value' ) { + return; // do nothing + } else { + // otherwise its a qualifier, which we ignore + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + } + } + + /** + * Starting an element when in MODE_INITIAL + * This usually happens when we hit an element inside + * the outer rdf:Description + * + * This is generally where most properties start. + * + * @param $ns String Namespace + * @param $tag String tag name (without namespace prefix) + * @param $attribs Array array of attributes + */ + private function startElementModeInitial( $ns, $tag, $attribs ) { + if ( $ns !== self::NS_RDF ) { + + if ( isset( $this->items[$ns][$tag] ) ) { + if ( isset( $this->items[$ns][$tag]['structPart'] ) ) { + // If this element is supposed to appear only as + // a child of a structure, but appears here (not as + // a child of a struct), then something weird is + // happening, so ignore this element and its children. + + wfDebugLog( 'XMP', "Encountered <$ns:$tag> outside" + . " of its expected parent. Ignoring." ); + + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + return; + } + $mode = $this->items[$ns][$tag]['mode']; + array_unshift( $this->mode, $mode ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + if ( $mode === self::MODE_STRUCT ) { + $this->ancestorStruct = isset( $this->items[$ns][$tag]['map_name'] ) + ? $this->items[$ns][$tag]['map_name'] : $tag; + } + if ( $this->charContent !== false ) { + // Something weird. + // Should not happen in valid XMP. + throw new MWException( 'tag nested in non-whitespace characters.' ); + } + } else { + // This element is not on our list of allowed elements so ignore. + wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + return; + } + + } + // process attributes + $this->doAttribs( $attribs ); + } + + /** + * Hit an opening element when in a Struct (MODE_STRUCT) + * This is generally for fields of a compound property. + * + * Example of a struct (abbreviated; flash has more properties): + * + * <exif:Flash> <rdf:Description> <exif:Fired>True</exif:Fired> + * <exif:Mode>1</exif:Mode></rdf:Description></exif:Flash> + * + * or: + * + * <exif:Flash rdf:parseType='Resource'> <exif:Fired>True</exif:Fired> + * <exif:Mode>1</exif:Mode></exif:Flash> + * + * @param $ns String namespace + * @param $tag String tag name (no ns) + * @param $attribs Array array of attribs w/ values. + */ + private function startElementModeStruct( $ns, $tag, $attribs ) { + if ( $ns !== self::NS_RDF ) { + + if ( isset( $this->items[$ns][$tag] ) ) { + if ( isset( $this->items[$ns][$this->ancestorStruct]['children'] ) + && !isset( $this->items[$ns][$this->ancestorStruct]['children'][$tag] ) ) + { + // This assumes that we don't have inter-namespace nesting + // which we don't in all the properties we're interested in. + throw new MWException( " <$tag> appeared nested in <" . $this->ancestorStruct + . "> where it is not allowed." ); + } + array_unshift( $this->mode, $this->items[$ns][$tag]['mode'] ); + array_unshift( $this->curItem, $ns . ' ' . $tag ); + if ( $this->charContent !== false ) { + // Something weird. + // Should not happen in valid XMP. + throw new MWException( "tag <$tag> nested in non-whitespace characters (" . $this->charContent . ")." ); + } + } else { + array_unshift( $this->mode, self::MODE_IGNORE ); + array_unshift( $this->curItem, $elm ); + return; + } + + } + + if ( $ns === self::NS_RDF && $tag === 'Description' ) { + $this->doAttribs( $attribs ); + array_unshift( $this->mode, self::MODE_STRUCT ); + array_unshift( $this->curItem, $this->curItem[0] ); + } + } + + /** + * opening element in MODE_LI + * process elements of arrays. + * + * Example: + * <exif:ISOSpeedRatings> <rdf:Seq> <rdf:li>64</rdf:li> + * </rdf:Seq> </exif:ISOSpeedRatings> + * This method is called when we hit the <rdf:li> element. + * + * @param $elm String: namespace . ' ' . tagname + * @param $attribs Array: Attributes. (needed for BAGSTRUCTS) + * @throws MWException if gets a tag other than <rdf:li> + */ + private function startElementModeLi( $elm, $attribs ) { + if ( ( $elm ) !== self::NS_RDF . ' li' ) { + throw new MWException( "<rdf:li> expected but got $elm." ); + } + + if ( !isset( $this->mode[1] ) ) { + // This should never ever ever happen. Checking for it + // to be paranoid. + throw new MWException( 'In mode Li, but no 2xPrevious mode!' ); + } + + if ( $this->mode[1] === self::MODE_BAGSTRUCT ) { + // This list item contains a compound (STRUCT) value. + array_unshift( $this->mode, self::MODE_STRUCT ); + array_unshift( $this->curItem, $elm ); + $this->processingArray = true; + + if ( !isset( $this->curItem[1] ) ) { + // be paranoid. + throw new MWException( 'Can not find parent of BAGSTRUCT.' ); + } + list( $curNS, $curTag ) = explode( ' ', $this->curItem[1] ); + $this->ancestorStruct = isset( $this->items[$curNS][$curTag]['map_name'] ) + ? $this->items[$curNS][$curTag]['map_name'] : $curTag; + + $this->doAttribs( $attribs ); + + } else { + // Normal BAG or SEQ containing simple values. + array_unshift( $this->mode, self::MODE_SIMPLE ); + // need to add curItem[0] on again since one is for the specific item + // and one is for the entire group. + array_unshift( $this->curItem, $this->curItem[0] ); + $this->processingArray = true; + } + + } + + /** + * Opening element in MODE_LI_LANG. + * process elements of language alternatives + * + * Example: + * <dc:title> <rdf:Alt> <rdf:li xml:lang="x-default">My house + * </rdf:li> </rdf:Alt> </dc:title> + * + * This method is called when we hit the <rdf:li> element. + * + * @param $elm String namespace . ' ' . tag + * @param $attribs array array of elements (most importantly xml:lang) + * @throws MWException if gets a tag other than <rdf:li> or if no xml:lang + */ + private function startElementModeLiLang( $elm, $attribs ) { + if ( $elm !== self::NS_RDF . ' li' ) { + throw new MWException( __METHOD__ . " <rdf:li> expected but got $elm." ); + } + if ( !isset( $attribs[ self::NS_XML . ' lang'] ) + || !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $attribs[ self::NS_XML . ' lang' ] ) ) + { + throw new MWException( __METHOD__ + . " <rdf:li> did not contain, or has invalid xml:lang attribute in lang alternative" ); + } + + // Lang is case-insensitive. + $this->itemLang = strtolower( $attribs[ self::NS_XML . ' lang' ] ); + + // need to add curItem[0] on again since one is for the specific item + // and one is for the entire group. + array_unshift( $this->curItem, $this->curItem[0] ); + array_unshift( $this->mode, self::MODE_SIMPLE ); + $this->processingArray = true; + } + + /** + * Hits an opening element. + * Generally just calls a helper based on what MODE we're in. + * Also does some initial set up for the wrapper element + * + * @param $parser XMLParser + * @param $elm String namespace <space> element + * @param $attribs Array attribute name => value + */ + function startElement( $parser, $elm, $attribs ) { + + if ( $elm === self::NS_RDF . ' RDF' + || $elm === 'adobe:ns:meta/ xmpmeta' + || $elm === 'adobe:ns:meta/ xapmeta') + { + /* ignore. */ + return; + } elseif ( $elm === self::NS_RDF . ' Description' ) { + if ( count( $this->mode ) === 0 ) { + // outer rdf:desc + array_unshift( $this->mode, self::MODE_INITIAL ); + } + } elseif ( $elm === self::NS_RDF . ' type' ) { + // This doesn't support rdf:type properly. + // In practise I have yet to see a file that + // uses this element, however it is mentioned + // on page 25 of part 1 of the xmp standard. + // + // also it seems as if exiv2 and exiftool do not support + // this either (That or I misunderstand the standard) + wfDebugLog( 'XMP', __METHOD__ . ' Encountered <rdf:type> which isn\'t currently supported' ); + } + + if ( strpos( $elm, ' ' ) === false ) { + // This probably shouldn't happen. + wfDebugLog( 'XMP', __METHOD__ . " Encountered <$elm> which has no namespace. Skipping." ); + return; + } + + list( $ns, $tag ) = explode( ' ', $elm, 2 ); + + if ( count( $this->mode ) === 0 ) { + // This should not happen. + throw new MWException('Error extracting XMP, ' + . "encountered <$elm> with no mode" ); + } + + switch( $this->mode[0] ) { + case self::MODE_IGNORE: + $this->startElementModeIgnore( $elm ); + break; + case self::MODE_SIMPLE: + $this->startElementModeSimple( $elm, $attribs ); + break; + case self::MODE_INITIAL: + $this->startElementModeInitial( $ns, $tag, $attribs ); + break; + case self::MODE_STRUCT: + $this->startElementModeStruct( $ns, $tag, $attribs ); + break; + case self::MODE_BAG: + case self::MODE_BAGSTRUCT: + $this->startElementModeBag( $elm ); + break; + case self::MODE_SEQ: + $this->startElementModeSeq( $elm ); + break; + case self::MODE_LANG: + $this->startElementModeLang( $elm ); + break; + case self::MODE_LI_LANG: + $this->startElementModeLiLang( $elm, $attribs ); + break; + case self::MODE_LI: + $this->startElementModeLi( $elm, $attribs ); + break; + case self::MODE_QDESC: + $this->startElementModeQDesc( $elm ); + break; + default: + throw new MWException( 'StartElement in unknown mode: ' . $this->mode[0] ); + break; + } + } + + /** + * Process attributes. + * Simple values can be stored as either a tag or attribute + * + * Often the initial <rdf:Description> tag just has all the simple + * properties as attributes. + * + * Example: + * <rdf:Description rdf:about="" xmlns:exif="http://ns.adobe.com/exif/1.0/" exif:DigitalZoomRatio="0/10"> + * + * @param $attribs Array attribute=>value array. + */ + private function doAttribs( $attribs ) { + + // first check for rdf:parseType attribute, as that can change + // how the attributes are interperted. + + if ( isset( $attribs[self::NS_RDF . ' parseType'] ) + && $attribs[self::NS_RDF . ' parseType'] === 'Resource' + && $this->mode[0] === self::MODE_SIMPLE ) + { + // this is equivalent to having an inner rdf:Description + $this->mode[0] = self::MODE_QDESC; + } + foreach ( $attribs as $name => $val ) { + + + if ( strpos( $name, ' ' ) === false ) { + // This shouldn't happen, but so far some old software forgets namespace + // on rdf:about. + wfDebugLog( 'XMP', __METHOD__ . ' Encountered non-namespaced attribute: ' + . " $name=\"$val\". Skipping. " ); + continue; + } + list( $ns, $tag ) = explode( ' ', $name, 2 ); + if ( $ns === self::NS_RDF ) { + if ( $tag === 'value' || $tag === 'resource' ) { + // resource is for url. + // value attribute is a weird way of just putting the contents. + $this->char( $this->xmlParser, $val ); + } + } elseif ( isset( $this->items[$ns][$tag] ) ) { + if ( $this->mode[0] === self::MODE_SIMPLE ) { + throw new MWException( __METHOD__ + . " $ns:$tag found as attribute where not allowed" ); + } + $this->saveValue( $ns, $tag, $val ); + } else { + wfDebugLog( 'XMP', __METHOD__ . " Ignoring unrecognized element <$ns:$tag>." ); + } + } + } + + /** + * Given an extracted value, save it to results array + * + * note also uses $this->ancestorStruct and + * $this->processingArray to determine what name to + * save the value under. (in addition to $tag). + * + * @param $ns String namespace of tag this is for + * @param $tag String tag name + * @param $val String value to save + */ + private function saveValue( $ns, $tag, $val ) { + + $info =& $this->items[$ns][$tag]; + $finalName = isset( $info['map_name'] ) + ? $info['map_name'] : $tag; + if ( isset( $info['validate'] ) ) { + $validate = is_array( $info['validate'] ) ? $info['validate'] + : array( 'XMPValidate', $info['validate'] ); + + if ( is_callable( $validate ) ) { + call_user_func_array( $validate, array( $info, &$val, true ) ); + // the reasoning behind using &$val instead of using the return value + // is to be consistent between here and validating structures. + if ( is_null( $val ) ) { + wfDebugLog( 'XMP', __METHOD__ . " <$ns:$tag> failed validation." ); + return; + } + } else { + wfDebugLog( 'XMP', __METHOD__ . " Validation function for $finalName (" + . $validate[0] . '::' . $validate[1] . '()) is not callable.' ); + } + } + + if ( $this->ancestorStruct && $this->processingArray ) { + // Aka both an array and a struct. ( self::MODE_BAGSTRUCT ) + $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][][$finalName] = $val; + } elseif ( $this->ancestorStruct ) { + $this->results['xmp-' . $info['map_group']][$this->ancestorStruct][$finalName] = $val; + } elseif ( $this->processingArray ) { + if ( $this->itemLang === false ) { + // normal array + $this->results['xmp-' . $info['map_group']][$finalName][] = $val; + } else { + // lang array. + $this->results['xmp-' . $info['map_group']][$finalName][$this->itemLang] = $val; + } + } else { + $this->results['xmp-' . $info['map_group']][$finalName] = $val; + } + } +} diff --git a/includes/media/XMPInfo.php b/includes/media/XMPInfo.php new file mode 100644 index 00000000..1d580ff7 --- /dev/null +++ b/includes/media/XMPInfo.php @@ -0,0 +1,1139 @@ +<?php +/** +* This class is just a container for a big array +* used by XMPReader to determine which XMP items to +* extract. +*/ +class XMPInfo { + + /** get the items array + * @return Array XMP item configuration array. + */ + public static function getItems ( ) { + if( !self::$ranHooks ) { + // This is for if someone makes a custom metadata extension. + // For example, a medical wiki might want to decode DICOM xmp properties. + wfRunHooks('XMPGetInfo', Array(&self::$items)); + self::$ranHooks = true; // Only want to do this once. + } + return self::$items; + } + + static private $ranHooks = false; + + /** + * XMPInfo::$items keeps a list of all the items + * we are interested to extract, as well as + * information about the item like what type + * it is. + * + * Format is an array of namespaces, + * each containing an array of tags + * each tag is an array of information about the + * tag, including: + * * map_group - what group (used for precedence during conflicts) + * * mode - What type of item (self::MODE_SIMPLE usually, see above for all values) + * * validate - method to validate input. Could also post-process the input. A string value is assumed to be a static method of XMPValidate. Can also take a array( 'className', 'methodName' ). + * * choices - array of potential values (format of 'value' => true ). Only used with validateClosed + * * rangeLow and rangeHigh - alternative to choices for numeric ranges. Again for validateClosed only. + * * children - for MODE_STRUCT items, allowed children. + * * structPart - Indicates that this element can only appear as a member of a structure. + * + * currently this just has a bunch of exif values as this class is only half-done + */ + + static private $items = array( + 'http://ns.adobe.com/exif/1.0/' => array( + 'ApertureValue' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'BrightnessValue' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'CompressedBitsPerPixel' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'DigitalZoomRatio' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'ExposureBiasValue' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'ExposureIndex' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'ExposureTime' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'FlashEnergy' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ), + 'FNumber' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'FocalLength' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'FocalPlaneXResolution' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'FocalPlaneYResolution' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSAltitude' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ), + 'GPSDestBearing' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSDestDistance' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSDOP' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSImgDirection' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSSpeed' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'GPSTrack' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'MaxApertureValue' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'ShutterSpeedValue' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + 'SubjectDistance' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational' + ), + /* Flash */ + 'Flash' => array( + 'mode' => XMPReader::MODE_STRUCT, + 'children' => array( + 'Fired' => true, + 'Function' => true, + 'Mode' => true, + 'RedEyeMode' => true, + 'Return' => true, + ), + 'validate' => 'validateFlash', + 'map_group' => 'exif', + ), + 'Fired' => array( + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'Function' => array( + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'Mode' => array( + 'map_group' => 'exif', + 'validate' => 'validateClosed', + 'mode' => XMPReader::MODE_SIMPLE, + 'choices' => array( '0' => true, '1' => true, + '2' => true, '3' => true ), + 'structPart'=> true, + ), + 'Return' => array( + 'map_group' => 'exif', + 'validate' => 'validateClosed', + 'mode' => XMPReader::MODE_SIMPLE, + 'choices' => array( '0' => true, + '2' => true, '3' => true ), + 'structPart'=> true, + ), + 'RedEyeMode' => array( + 'map_group' => 'exif', + 'validate' => 'validateBoolean', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + /* End Flash */ + 'ISOSpeedRatings' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger' + ), + /* end rational things */ + 'ColorSpace' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '65535' => true ), + ), + 'ComponentsConfiguration' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true, + '5' => true, '6' => true ) + ), + 'Contrast' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '0' => true, '1' => true, '2' => true ) + ), + 'CustomRendered' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '0' => true, '1' => true ) + ), + 'DateTimeOriginal' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ), + 'DateTimeDigitized' => array( /* xmp:CreateDate */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ), + /* todo: there might be interesting information in + * exif:DeviceSettingDescription, but need to find an + * example + */ + 'ExifVersion' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'ExposureMode' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ), + 'ExposureProgram' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 8, + ), + 'FileSource' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '3' => true ) + ), + 'FlashpixVersion' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'FocalLengthIn35mmFilm' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'FocalPlaneResolutionUnit' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '2' => true, '3' => true ), + ), + 'GainControl' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 4, + ), + /* this value is post-processed out later */ + 'GPSAltitudeRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '0' => true, '1' => true ), + ), + 'GPSAreaInformation' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'GPSDestBearingRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'T' => true, 'M' => true ), + ), + 'GPSDestDistanceRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'K' => true, 'M' => true, + 'N' => true ), + ), + 'GPSDestLatitude' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ), + 'GPSDestLongitude' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ), + 'GPSDifferential' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '0' => true, '1' => true ), + ), + 'GPSImgDirectionRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'T' => true, 'M' => true ), + ), + 'GPSLatitude' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ), + 'GPSLongitude' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateGPS', + ), + 'GPSMapDatum' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'GPSMeasureMode' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '2' => true, '3' => true ) + ), + 'GPSProcessingMethod' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'GPSSatellites' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'GPSSpeedRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'K' => true, 'M' => true, + 'N' => true ), + ), + 'GPSStatus' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'A' => true, 'V' => true ) + ), + 'GPSTimeStamp' => array( + 'map_group' => 'exif', + // Note: in exif, GPSDateStamp does not include + // the time, where here it does. + 'map_name' => 'GPSDateStamp', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ), + 'GPSTrackRef' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( 'T' => true, 'M' => true ) + ), + 'GPSVersionID' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'ImageUniqueID' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'LightSource' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + /* can't use a range, as it skips... */ + 'choices' => array( '0' => true, '1' => true, + '2' => true, '3' => true, '4' => true, + '9' => true, '10' => true, '11' => true, + '12' => true, '13' => true, + '14' => true, '15' => true, + '17' => true, '18' => true, + '19' => true, '20' => true, + '21' => true, '22' => true, + '23' => true, '24' => true, + '255' => true, + ), + ), + 'MeteringMode' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 6, + 'choices' => array( '255' => true ), + ), + /* Pixel(X|Y)Dimension are rather useless, but for + * completeness since we do it with exif. + */ + 'PixelXDimension' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'PixelYDimension' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'Saturation' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ), + 'SceneCaptureType' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 3, + ), + 'SceneType' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true ), + ), + // Note, 6 is not valid SensingMethod. + 'SensingMethod' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 1, + 'rangeHigh' => 5, + 'choices' => array( '7' => true, 8 => true ), + ), + 'Sharpness' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 2, + ), + 'SpectralSensitivity' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + // This tag should perhaps be displayed to user better. + 'SubjectArea' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ), + 'SubjectDistanceRange' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'rangeLow' => 0, + 'rangeHigh' => 3, + ), + 'SubjectLocation' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ), + 'UserComment' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ), + 'WhiteBalance' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '0' => true, '1' => true ) + ), + ), + 'http://ns.adobe.com/tiff/1.0/' => array( + 'Artist' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'BitsPerSample' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateInteger', + ), + 'Compression' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '6' => true ), + ), + /* this prop should not be used in XMP. dc:rights is the correct prop */ + 'Copyright' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ), + 'DateTime' => array( /* proper prop is xmp:ModifyDate */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ), + 'ImageDescription' => array( /* proper one is dc:description */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_LANG, + ), + 'ImageLength' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'ImageWidth' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'Make' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Model' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + /**** Do not extract this property + * It interferes with auto exif rotation. + * 'Orientation' => array( + * 'map_group' => 'exif', + * 'mode' => XMPReader::MODE_SIMPLE, + * 'validate' => 'validateClosed', + * 'choices' => array( '1' => true, '2' => true, '3' => true, '4' => true, 5 => true, + * '6' => true, '7' => true, '8' => true ), + *), + ******/ + 'PhotometricInterpretation' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '2' => true, '6' => true ), + ), + 'PlanerConfiguration' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '2' => true ), + ), + 'PrimaryChromaticities' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ), + 'ReferenceBlackWhite' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ), + 'ResolutionUnit' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '2' => true, '3' => true ), + ), + 'SamplesPerPixel' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + ), + 'Software' => array( /* see xmp:CreatorTool */ + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + /* ignore TransferFunction */ + 'WhitePoint' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ), + 'XResolution' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ), + 'YResolution' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRational', + ), + 'YCbCrCoefficients' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateRational', + ), + 'YCbCrPositioning' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '2' => true ), + ), + 'YCbCrSubSampling' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateClosed', + 'choices' => array( '1' => true, '2' => true ), + ), + ), + 'http://ns.adobe.com/exif/1.0/aux/' => array( + 'Lens' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'SerialNumber' => array( + 'map_group' => 'exif', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'OwnerName' => array( + 'map_group' => 'exif', + 'map_name' => 'CameraOwnerName', + 'mode' => XMPReader::MODE_SIMPLE, + ), + ), + 'http://purl.org/dc/elements/1.1/' => array( + 'title' => array( + 'map_group' => 'general', + 'map_name' => 'ObjectName', + 'mode' => XMPReader::MODE_LANG + ), + 'description' => array( + 'map_group' => 'general', + 'map_name' => 'ImageDescription', + 'mode' => XMPReader::MODE_LANG + ), + 'contributor' => array( + 'map_group' => 'general', + 'map_name' => 'dc-contributor', + 'mode' => XMPReader::MODE_BAG + ), + 'coverage' => array( + 'map_group' => 'general', + 'map_name' => 'dc-coverage', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'creator' => array( + 'map_group' => 'general', + 'map_name' => 'Artist', //map with exif Artist, iptc byline (2:80) + 'mode' => XMPReader::MODE_SEQ, + ), + 'date' => array( + 'map_group' => 'general', + // Note, not mapped with other date properties, as this type of date is + // non-specific: "A point or period of time associated with an event in + // the lifecycle of the resource" + 'map_name' => 'dc-date', + 'mode' => XMPReader::MODE_SEQ, + 'validate' => 'validateDate', + ), + /* Do not extract dc:format, as we've got better ways to determine mimetype */ + 'identifier' => array( + 'map_group' => 'deprecated', + 'map_name' => 'Identifier', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'language' => array( + 'map_group' => 'general', + 'map_name' => 'LanguageCode', /* mapped with iptc 2:135 */ + 'mode' => XMPReader::MODE_BAG, + 'validate' => 'validateLangCode', + ), + 'publisher' => array( + 'map_group' => 'general', + 'map_name' => 'dc-publisher', + 'mode' => XMPReader::MODE_BAG, + ), + // for related images/resources + 'relation' => array( + 'map_group' => 'general', + 'map_name' => 'dc-relation', + 'mode' => XMPReader::MODE_BAG, + ), + 'rights' => array( + 'map_group' => 'general', + 'map_name' => 'Copyright', + 'mode' => XMPReader::MODE_LANG, + ), + // Note: source is not mapped with iptc source, since iptc + // source describes the source of the image in terms of a person + // who provided the image, where this is to describe an image that the + // current one is based on. + 'source' => array( + 'map_group' => 'general', + 'map_name' => 'dc-source', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'subject' => array( + 'map_group' => 'general', + 'map_name' => 'Keywords', /* maps to iptc 2:25 */ + 'mode' => XMPReader::MODE_BAG, + ), + 'type' => array( + 'map_group' => 'general', + 'map_name' => 'dc-type', + 'mode' => XMPReader::MODE_BAG, + ), + ), + 'http://ns.adobe.com/xap/1.0/' => array( + 'CreateDate' => array( + 'map_group' => 'general', + 'map_name' => 'DateTimeDigitized', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateDate', + ), + 'CreatorTool' => array( + 'map_group' => 'general', + 'map_name' => 'Software', + 'mode' => XMPReader::MODE_SIMPLE + ), + 'Identifier' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + ), + 'Label' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'ModifyDate' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'DateTime', + 'validate' => 'validateDate', + ), + 'MetadataDate' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + // map_name to be consistent with other date names. + 'map_name' => 'DateTimeMetadata', + 'validate' => 'validateDate', + ), + 'Nickname' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Rating' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateRating', + ), + ), + 'http://ns.adobe.com/xap/1.0/rights/' => array( + 'Certificate' => array( + 'map_group' => 'general', + 'map_name' => 'RightsCertificate', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Marked' => array( + 'map_group' => 'general', + 'map_name' => 'Copyrighted', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateBoolean', + ), + 'Owner' => array( + 'map_group' => 'general', + 'map_name' => 'CopyrightOwner', + 'mode' => XMPReader::MODE_BAG, + ), + // this seems similar to dc:rights. + 'UsageTerms' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_LANG, + ), + 'WebStatement' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + ), + // XMP media management. + 'http://ns.adobe.com/xap/1.0/mm/' => array( + // if we extract the exif UniqueImageID, might + // as well do this too. + 'OriginalDocumentID' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + // It might also be useful to do xmpMM:LastURL + // and xmpMM:DerivedFrom as you can potentially, + // get the url of this document/source for this + // document. However whats more likely is you'd + // get a file:// url for the path of the doc, + // which is somewhat of a privacy issue. + ), + 'http://creativecommons.org/ns#' => array( + 'license' => array( + 'map_name' => 'LicenseUrl', + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'morePermissions' => array( + 'map_name' => 'MorePermissionsUrl', + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'attributionURL' => array( + 'map_group' => 'general', + 'map_name' => 'AttributionUrl', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'attributionName' => array( + 'map_group' => 'general', + 'map_name' => 'PreferredAttributionName', + 'mode' => XMPReader::MODE_SIMPLE, + ), + ), + //Note, this property affects how jpeg metadata is extracted. + 'http://ns.adobe.com/xmp/note/' => array( + 'HasExtendedXMP' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + ), + ), + /* Note, in iptc schemas, the legacy properties are denoted + * as deprecated, since other properties should used instead, + * and properties marked as deprecated in the standard are + * are marked as general here as they don't have replacements + */ + 'http://ns.adobe.com/photoshop/1.0/' => array( + 'City' => array( + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CityDest', + ), + 'Country' => array( + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CountryDest', + ), + 'State' => array( + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'ProvinceOrStateDest', + ), + 'DateCreated' => array( + 'map_group' => 'deprecated', + // marking as deprecated as the xmp prop preferred + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'DateTimeOriginal', + 'validate' => 'validateDate', + // note this prop is an XMP, not IPTC date + ), + 'CaptionWriter' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'Writer', + ), + 'Instructions' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'SpecialInstructions', + ), + 'TransmissionReference' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'OriginalTransmissionRef', + ), + 'AuthorsPosition' => array( + /* This corresponds with 2:85 + * By-line Title, which needs to be + * handled weirdly to correspond + * with iptc/exif. */ + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE + ), + 'Credit' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Source' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Urgency' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'Category' => array( + // Note, this prop is deprecated, but in general + // group since it doesn't have a replacement. + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'iimCategory', + ), + 'SupplementalCategories' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'iimSupplementalCategory', + ), + 'Headline' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE + ), + ), + 'http://iptc.org/std/Iptc4xmpCore/1.0/xmlns/' => array( + 'CountryCode' => array( + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'CountryCodeDest', + ), + 'IntellectualGenre' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + // Note, this is a six digit code. + // See: http://cv.iptc.org/newscodes/scene/ + // Since these aren't really all that common, + // we just show the number. + 'Scene' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'validate' => 'validateInteger', + 'map_name' => 'SceneCode', + ), + /* Note: SubjectCode should be an 8 ascii digits. + * it is not really an integer (has leading 0's, + * cannot have a +/- sign), but validateInteger + * will let it through. + */ + 'SubjectCode' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'SubjectNewsCode', + 'validate' => 'validateInteger' + ), + 'Location' => array( + 'map_group' => 'deprecated', + 'mode' => XMPReader::MODE_SIMPLE, + 'map_name' => 'SublocationDest', + ), + 'CreatorContactInfo' => array( + /* Note this maps to 2:118 in iim + * (Contact) field. However those field + * types are slightly different - 2:118 + * is free form text field, where this + * is more structured. + */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_STRUCT, + 'map_name' => 'Contact', + 'children' => array( + 'CiAdrExtadr' => true, + 'CiAdrCity' => true, + 'CiAdrCtry' => true, + 'CiEmailWork' => true, + 'CiTelWork' => true, + 'CiAdrPcode' => true, + 'CiAdrRegion' => true, + 'CiUrlWork' => true, + ), + ), + 'CiAdrExtadr' => array( /* address */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiAdrCity' => array( /* city */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiAdrCtry' => array( /* country */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiEmailWork' => array( /* email (possibly separated by ',') */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiTelWork' => array( /* telephone */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiAdrPcode' => array( /* postal code */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiAdrRegion' => array( /* province/state */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CiUrlWork' => array( /* url. Multiple may be separated by comma. */ + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + /* End contact info struct properties */ + ), + 'http://iptc.org/std/Iptc4xmpExt/2008-02-29/' => array( + 'Event' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + ), + 'OrganisationInImageName' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + 'map_name' => 'OrganisationInImage' + ), + 'PersonInImage' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_BAG, + ), + 'MaxAvailHeight' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + 'map_name' => 'OriginalImageHeight', + ), + 'MaxAvailWidth' => array( + 'map_group' => 'general', + 'mode' => XMPReader::MODE_SIMPLE, + 'validate' => 'validateInteger', + 'map_name' => 'OriginalImageWidth', + ), + // LocationShown and LocationCreated are handled + // specially because they are hierarchical, but we + // also want to merge with the old non-hierarchical. + 'LocationShown' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_BAGSTRUCT, + 'children' => array( + 'WorldRegion' => true, + 'CountryCode' => true, /* iso code */ + 'CountryName' => true, + 'ProvinceState' => true, + 'City' => true, + 'Sublocation' => true, + ), + ), + 'LocationCreated' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_BAGSTRUCT, + 'children' => array( + 'WorldRegion' => true, + 'CountryCode' => true, /* iso code */ + 'CountryName' => true, + 'ProvinceState' => true, + 'City' => true, + 'Sublocation' => true, + ), + ), + 'WorldRegion' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CountryCode' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'CountryName' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + 'map_name' => 'Country', + ), + 'ProvinceState' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + 'map_name' => 'ProvinceOrState', + ), + 'City' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + 'Sublocation' => array( + 'map_group' => 'special', + 'mode' => XMPReader::MODE_SIMPLE, + 'structPart'=> true, + ), + + /* Other props that might be interesting but + * Not currently extracted: + * ArtworkOrObject, (info about objects in picture) + * DigitalSourceType + * RegistryId + */ + ), + + /* Plus props we might want to consider: + * (Note: some of these have unclear/incomplete definitions + * from the iptc4xmp standard). + * ImageSupplier (kind of like iptc source field) + * ImageSupplierId (id code for image from supplier) + * CopyrightOwner + * ImageCreator + * Licensor + * Various model release fields + * Property release fields. + */ + ); +} diff --git a/includes/media/XMPValidate.php b/includes/media/XMPValidate.php new file mode 100644 index 00000000..0f1d375c --- /dev/null +++ b/includes/media/XMPValidate.php @@ -0,0 +1,323 @@ +<?php +/** +* This contains some static methods for +* validating XMP properties. See XMPInfo and XMPReader classes. +* +* Each of these functions take the same parameters +* * an info array which is a subset of the XMPInfo::items array +* * A value (passed as reference) to validate. This can be either a +* simple value or an array +* * A boolean to determine if this is validating a simple or complex values +* +* It should be noted that when an array is being validated, typically the validation +* function is called once for each value, and then once at the end for the entire array. +* +* These validation functions can also be used to modify the data. See the gps and flash one's +* for example. +* +* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf starting at pg 28 +* @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf starting at pg 11 +*/ +class XMPValidate { + /** + * function to validate boolean properties ( True or False ) + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateBoolean( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( $val !== 'True' && $val !== 'False' ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected True or False but got $val" ); + $val = null; + } + + } + + /** + * function to validate rational properties ( 12/10 ) + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateRational( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^(?:-?\d+)\/(?:\d+[1-9]|[1-9]\d*)$/D', $val ) ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected rational but got $val" ); + $val = null; + } + + } + + /** + * function to validate rating properties -1, 0-5 + * + * if its outside of range put it into range. + * + * @see MWG spec + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateRating( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-+]?\d*(?:\.?\d*)$/D', $val ) + || !is_numeric($val) + ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected rating but got $val" ); + $val = null; + return; + } else { + $nVal = (float) $val; + if ( $nVal < 0 ) { + // We do < 0 here instead of < -1 here, since + // the values between 0 and -1 are also illegal + // as -1 is meant as a special reject rating. + wfDebugLog( 'XMP', __METHOD__ . " Rating too low, setting to -1 (Rejected)"); + $val = '-1'; + return; + } + if ( $nVal > 5 ) { + wfDebugLog( 'XMP', __METHOD__ . " Rating too high, setting to 5"); + $val = '5'; + return; + } + } + } + + /** + * function to validate integers + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateInteger( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-+]?\d+$/D', $val ) ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected integer but got $val" ); + $val = null; + } + + } + + /** + * function to validate properties with a fixed number of allowed + * choices. (closed choice) + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateClosed( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + + //check if its in a numeric range + $inRange = false; + if ( isset( $info['rangeLow'] ) + && isset( $info['rangeHigh'] ) + && is_numeric( $val ) + && ( intval( $val ) <= $info['rangeHigh'] ) + && ( intval( $val ) >= $info['rangeLow'] ) + ) { + $inRange = true; + } + + if ( !isset( $info['choices'][$val] ) && !$inRange ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected closed choice, but got $val" ); + $val = null; + } + } + + /** + * function to validate and modify flash structure + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateFlash( $info, &$val, $standalone ) { + if ( $standalone ) { + // this only validates flash structs, not individual properties + return; + } + if ( !( isset( $val['Fired'] ) + && isset( $val['Function'] ) + && isset( $val['Mode'] ) + && isset( $val['RedEyeMode'] ) + && isset( $val['Return'] ) + ) ) { + wfDebugLog( 'XMP', __METHOD__ . " Flash structure did not have all the required components" ); + $val = null; + } else { + $val = ( "\0" | ( $val['Fired'] === 'True' ) + | ( intval( $val['Return'] ) << 1 ) + | ( intval( $val['Mode'] ) << 3 ) + | ( ( $val['Function'] === 'True' ) << 5 ) + | ( ( $val['RedEyeMode'] === 'True' ) << 6 ) ); + } + } + + /** + * function to validate LangCode properties ( en-GB, etc ) + * + * This is just a naive check to make sure it somewhat looks like a lang code. + * + * @see rfc 3066 + * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart1.pdf page 30 (section 8.2.2.5) + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateLangCode( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + if ( !preg_match( '/^[-A-Za-z0-9]{2,}$/D', $val) ) { + //this is a rather naive check. + wfDebugLog( 'XMP', __METHOD__ . " Expected Lang code but got $val" ); + $val = null; + } + + } + + /** + * function to validate date properties, and convert to Exif format. + * + * @param $info Array information about current property + * @param &$val Mixed current value to validate. Converts to TS_EXIF as a side-effect. + * @param $standalone Boolean if this is a simple property or array + */ + public static function validateDate( $info, &$val, $standalone ) { + if ( !$standalone ) { + // this only validates standalone properties, not arrays, etc + return; + } + $res = array(); + if ( !preg_match( + /* ahh! scary regex... */ + '/^([0-3]\d{3})(?:-([01]\d)(?:-([0-3]\d)(?:T([0-2]\d):([0-6]\d)(?::([0-6]\d)(?:\.\d+)?)?([-+]\d{2}:\d{2}|Z)?)?)?)?$/D' + , $val, $res) + ) { + wfDebugLog( 'XMP', __METHOD__ . " Expected date but got $val" ); + $val = null; + } else { + /* + * $res is formatted as follows: + * 0 -> full date. + * 1 -> year, 2-> month, 3-> day, 4-> hour, 5-> minute, 6->second + * 7-> Timezone specifier (Z or something like +12:30 ) + * many parts are optional, some aren't. For example if you specify + * minute, you must specify hour, day, month, and year but not second or TZ. + */ + + /* + * First of all, if year = 0000, Something is wrongish, + * so don't extract. This seems to happen when + * some programs convert between metadata formats. + */ + if ( $res[1] === '0000' ) { + wfDebugLog( 'XMP', __METHOD__ . " Invalid date (year 0): $val" ); + $val = null; + return; + } + //if month, etc unspecified, full out as 01. + $res[2] = isset( $res[2] ) ? $res[2] : '01'; //month + $res[3] = isset( $res[3] ) ? $res[3] : '01'; //day + if ( !isset( $res[4] ) ) { //hour + //just have the year month day + $val = $res[1] . ':' . $res[2] . ':' . $res[3]; + return; + } + //if hour is set, so is minute or regex above will fail. + //Extra check for empty string necessary due to TZ but no second case. + $res[6] = isset( $res[6] ) && $res[6] != '' ? $res[6] : '00'; + + if ( !isset( $res[7] ) || $res[7] === 'Z' ) { + $val = $res[1] . ':' . $res[2] . ':' . $res[3] + . ' ' . $res[4] . ':' . $res[5] . ':' . $res[6]; + return; + } + + //do timezone processing. We've already done the case that tz = Z. + + $unix = wfTimestamp( TS_UNIX, $res[1] . $res[2] . $res[3] . $res[4] . $res[5] . $res[6] ); + $offset = intval( substr( $res[7], 1, 2 ) ) * 60 * 60; + $offset += intval( substr( $res[7], 4, 2 ) ) * 60; + if ( substr( $res[7], 0, 1 ) === '-' ) { + $offset = -$offset; + } + $val = wfTimestamp( TS_EXIF, $unix + $offset ); + } + + } + + /** function to validate, and more importantly + * translate the XMP DMS form of gps coords to + * the decimal form we use. + * + * @see http://www.adobe.com/devnet/xmp/pdfs/XMPSpecificationPart2.pdf + * section 1.2.7.4 on page 23 + * + * @param $info Array unused (info about prop) + * @param &$val String GPS string in either DDD,MM,SSk or + * or DDD,MM.mmk form + * @param $standalone Boolean if its a simple prop (should always be true) + */ + public static function validateGPS ( $info, &$val, $standalone ) { + if ( !$standalone ) { + return; + } + + $m = array(); + if ( preg_match( + '/(\d{1,3}),(\d{1,2}),(\d{1,2})([NWSE])/D', + $val, $m ) + ) { + $coord = intval( $m[1] ); + $coord += intval( $m[2] ) * (1/60); + $coord += intval( $m[3] ) * (1/3600); + if ( $m[4] === 'S' || $m[4] === 'W' ) { + $coord = -$coord; + } + $val = $coord; + return; + } elseif ( preg_match( + '/(\d{1,3}),(\d{1,2}(?:.\d*)?)([NWSE])/D', + $val, $m ) + ) { + $coord = intval( $m[1] ); + $coord += floatval( $m[2] ) * (1/60); + if ( $m[3] === 'S' || $m[3] === 'W' ) { + $coord = -$coord; + } + $val = $coord; + return; + + } else { + wfDebugLog( 'XMP', __METHOD__ + . " Expected GPSCoordinate, but got $val." ); + $val = null; + return; + } + } +} |