summaryrefslogtreecommitdiff
path: root/extensions/PdfHandler/PdfHandler.image.php
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@sbcglobal.net>2015-02-25 23:38:25 -0500
committerLuke Shumaker <lukeshu@sbcglobal.net>2015-02-25 23:38:25 -0500
commitb0e5922cdadff2b394100dc8977bc2d526c04595 (patch)
treef1c19b1aaf0988cdef72f978b9f16c5d631d3727 /extensions/PdfHandler/PdfHandler.image.php
parentad2b9dc3e492af9d550532817f34f865a97a8f63 (diff)
parentb88ab0086858470dd1f644e64cb4e4f62bb2be9b (diff)
Merge commit 'b88ab'
Diffstat (limited to 'extensions/PdfHandler/PdfHandler.image.php')
-rw-r--r--extensions/PdfHandler/PdfHandler.image.php309
1 files changed, 0 insertions, 309 deletions
diff --git a/extensions/PdfHandler/PdfHandler.image.php b/extensions/PdfHandler/PdfHandler.image.php
deleted file mode 100644
index 49da7f4e..00000000
--- a/extensions/PdfHandler/PdfHandler.image.php
+++ /dev/null
@@ -1,309 +0,0 @@
-<?php
-/**
- *
- * Copyright © 2007 Xarax <jodeldi@gmx.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- */
-
-/**
- * inspired by djvuimage from Brion Vibber
- * modified and written by xarax
- */
-
-class PdfImage {
-
- /**
- * @param $filename
- */
- function __construct( $filename ) {
- $this->mFilename = $filename;
- }
-
- /**
- * @return bool
- */
- public function isValid() {
- return true;
- }
-
- /**
- * @return array|bool
- */
- public function getImageSize() {
- $data = $this->retrieveMetadata();
- $size = self::getPageSize( $data, 1 );
-
- if( $size ) {
- $width = $size['width'];
- $height = $size['height'];
- return array( $width, $height, 'Pdf',
- "width=\"$width\" height=\"$height\"" );
- }
- return false;
- }
-
- /**
- * @param $data array
- * @param $page
- * @return array|bool
- */
- public static function getPageSize( $data, $page ) {
- global $wgPdfHandlerDpi;
-
- if( isset( $data['pages'][$page]['Page size'] ) ) {
- $o = $data['pages'][$page]['Page size'];
- } elseif( isset( $data['Page size'] ) ) {
- $o = $data['Page size'];
- } else {
- $o = false;
- }
-
- if ( $o ) {
- if( isset( $data['pages'][$page]['Page rot'] ) ) {
- $r = $data['pages'][$page]['Page rot'];
- } elseif( isset( $data['Page rot'] ) ) {
- $r = $data['Page rot'];
- } else {
- $r = 0;
- }
- $size = explode( 'x', $o, 2 );
-
- if ( $size ) {
- $width = intval( trim( $size[0] ) / 72 * $wgPdfHandlerDpi );
- $height = explode( ' ', trim( $size[1] ), 2 );
- $height = intval( trim( $height[0] ) / 72 * $wgPdfHandlerDpi );
- if ( ( $r/90 ) & 1 ) {
- // Swap width and height for landscape pages
- $t = $width;
- $width = $height;
- $height = $t;
- }
-
- return array(
- 'width' => $width,
- 'height' => $height
- );
- }
- }
-
- return false;
- }
-
- /**
- * @return array|bool|null
- */
- public function retrieveMetaData() {
- global $wgPdfInfo, $wgPdftoText;
-
- if ( $wgPdfInfo ) {
- wfProfileIn( 'pdfinfo' );
- $cmd = wfEscapeShellArg( $wgPdfInfo ) .
- " -enc UTF-8 " . # Report metadata as UTF-8 text...
- " -l 9999999 " . # Report page sizes for all pages
- " -meta " . # Report XMP metadata
- wfEscapeShellArg( $this->mFilename );
- $retval = '';
- $dump = wfShellExec( $cmd, $retval );
- $data = $this->convertDumpToArray( $dump );
- wfProfileOut( 'pdfinfo' );
- } else {
- $data = null;
- }
-
- # Read text layer
- if ( isset( $wgPdftoText ) ) {
- wfProfileIn( 'pdftotext' );
- $cmd = wfEscapeShellArg( $wgPdftoText ) . ' '. wfEscapeShellArg( $this->mFilename ) . ' - ';
- wfDebug( __METHOD__.": $cmd\n" );
- $retval = '';
- $txt = wfShellExec( $cmd, $retval );
- wfProfileOut( 'pdftotext' );
- if( $retval == 0 ) {
- $txt = str_replace( "\r\n", "\n", $txt );
- $pages = explode( "\f", $txt );
- foreach( $pages as $page => $pageText ) {
- # Get rid of invalid UTF-8, strip control characters
- # Note we need to do this per page, as \f page feed would be stripped.
- $pages[$page] = UtfNormal::cleanUp( $pageText );
- }
- $data['text'] = $pages;
- }
- }
- return $data;
- }
-
- /**
- * @param $dump string
- * @return array|bool
- */
- protected function convertDumpToArray( $dump ) {
- if ( strval( $dump ) == '' ) {
- return false;
- }
-
- $lines = explode( "\n", $dump );
- $data = array();
-
- // Metadata is always the last item, and spans multiple lines.
- $inMetadata = false;
-
- // Basically this loop will go through each line, splitting key value
- // pairs on the colon, until it gets to a "Metadata:\n" at which point
- // it will gather all remaining lines into the xmp key.
- foreach( $lines as $line ) {
- if ( $inMetadata ) {
- # Handle XMP differently due to diffence in line break
- $data['xmp'] .= "\n$line";
- continue;
- }
- $bits = explode( ':', $line, 2 );
- if( count( $bits ) > 1 ) {
- $key = trim( $bits[0] );
- if ( $key === 'Metadata' ) {
- $inMetadata = true;
- $data['xmp'] = '';
- continue;
- }
- $value = trim( $bits[1] );
- $matches = array();
- // "Page xx rot" will be in poppler 0.20's pdfinfo output
- // See https://bugs.freedesktop.org/show_bug.cgi?id=41867
- if( preg_match( '/^Page +(\d+) (size|rot)$/', $key, $matches ) ) {
- $data['pages'][$matches[1]][$matches[2] == 'size' ? 'Page size' : 'Page rot'] = $value;
- } else {
- $data[$key] = $value;
- }
- }
- }
- $data = $this->postProcessDump( $data );
- return $data;
- }
-
- /**
- * Postprocess the metadata (convert xmp into useful form, etc)
- *
- * This is used to generate the metadata table at the bottom
- * of the image description page.
- *
- * @param $data Array metadata
- * @return Array post-processed metadata
- */
- protected function postProcessDump( array $data ) {
-
- $meta = new BitmapMetadataHandler();
- $items = array();
- foreach( $data as $key => $val ) {
- switch ( $key ) {
- case 'Title':
- $items['ObjectName'] = $val;
- break;
- case 'Subject':
- $items['ImageDescription'] = $val;
- break;
- case 'Keywords':
- // Sometimes we have empty keywords. This seems
- // to be a product of how pdfinfo deals with keywords
- // with spaces in them. Filter such empty keywords
- $keyList = array_filter( explode( ' ', $val ) );
- if ( count( $keyList ) > 0 ) {
- $items['Keywords'] = $keyList;
- }
- break;
- case 'Author':
- $items['Artist'] = $val;
- break;
- case 'Creator':
- // Program used to create file.
- // Different from program used to convert to pdf.
- $items['Software'] = $val;
- break;
- case 'Producer':
- // Conversion program
- $items['pdf-Producer'] = $val;
- break;
- case 'ModTime':
- $timestamp = wfTimestamp( TS_EXIF, $val );
- if ( $timestamp ) {
- // 'if' is just paranoia
- $items['DateTime'] = $timestamp;
- }
- break;
- case 'CreationTime':
- $timestamp = wfTimestamp( TS_EXIF, $val );
- if ( $timestamp ) {
- $items['DateTimeDigitized'] = $timestamp;
- }
- break;
- // These last two (version and encryption) I was unsure
- // if we should include in the table, since they aren't
- // all that useful to editors. I leaned on the side
- // of including. However not including if file
- // is optimized/linearized since that is really useless
- // to an editor.
- case 'PDF version':
- $items['pdf-Version'] = $val;
- break;
- case 'Encrypted':
- // @todo: The value isn't i18n-ised. The appropriate
- // place to do that is in FormatMetadata.php
- // should add a hook a there.
- // For reference, if encrypted this fields value looks like:
- // "yes (print:yes copy:no change:no addNotes:no)"
- $items['pdf-Encrypted'] = $val;
- break;
- // Note 'pages' and 'Pages' are different keys (!)
- case 'pages':
- // A pdf document can have multiple sized pages in it.
- // (However 95% of the time, all pages are the same size)
- // get a list of all the unique page sizes in document.
- // This doesn't do anything with rotation as of yet,
- // mostly because I am unsure of what a good way to
- // present that information to the user would be.
- $pageSizes = array();
- foreach( $val as $page ) {
- if( isset( $page['Page size'] ) ) {
- $pageSizes[ $page['Page size'] ] = true;
- }
- }
-
- $pageSizeArray = array_keys( $pageSizes );
- if ( count( $pageSizeArray ) > 0 ) {
- $items['pdf-PageSize'] = $pageSizeArray;
- }
- break;
- }
-
- }
- $meta->addMetadata( $items, 'native' );
-
- if ( isset( $data['xmp'] ) && function_exists( 'xml_parser_create_ns' ) ) {
- // func exists verifies that the xml extension required for XMPReader
- // is present (Almost always is present)
- // @todo: This only handles generic xmp properties. Would be improved
- // by handling pdf xmp properties (pdf and pdfx) via XMPInfo hook.
- $xmp = new XMPReader();
- $xmp->parse( $data['xmp'] );
- $xmpRes = $xmp->getResults();
- foreach ( $xmpRes as $type => $xmpSection ) {
- $meta->addMetadata( $xmpSection, $type );
- }
- }
- unset( $data['xmp'] );
- $data['mergedMetadata'] = $meta->getMetadataArray();
- return $data;
- }
-}