diff options
Diffstat (limited to 'includes/DjVuImage.php')
-rw-r--r-- | includes/DjVuImage.php | 26 |
1 files changed, 25 insertions, 1 deletions
diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index 8e7caf63..75df0fd5 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -224,7 +224,7 @@ class DjVuImage { * @return string */ function retrieveMetaData() { - global $wgDjvuToXML, $wgDjvuDump; + global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; if ( isset( $wgDjvuDump ) ) { # djvudump is faster as of version 3.5 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 @@ -242,6 +242,30 @@ class DjVuImage { } else { $xml = null; } + # Text layer + if ( isset( $wgDjvuTxt ) ) { + wfProfileIn( 'djvutxt' ); + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; + wfDebug( __METHOD__.": $cmd\n" ); + $txt = wfShellExec( $cmd, $retval ); + wfProfileOut( 'djvutxt' ); + if( $retval == 0) { + # Get rid of invalid UTF-8, strip control characters + if( is_callable( 'iconv' ) ) { + wfSuppressWarnings(); + $txt = iconv( "UTF-8","UTF-8//IGNORE", $txt ); + wfRestoreWarnings(); + } else { + $txt = UtfNormal::cleanUp( $txt ); + } + $txt = preg_replace( "/[\013\035\037]/", "", $txt ); + $txt = htmlspecialchars($txt); + $txt = preg_replace( "/\((page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*\"([^<]*?)\"\s*|)\)/s", "<PAGE value=\"$2\" />", $txt ); + $txt = "<DjVuTxt>\n<HEAD></HEAD>\n<BODY>\n" . $txt . "</BODY>\n</DjVuTxt>\n"; + $xml = preg_replace( "/<DjVuXML>/", "<mw-djvu><DjVuXML>", $xml ); + $xml = $xml . $txt. '</mw-djvu>' ; + } + } return $xml; } |