From 222b01f5169f1c7e69762e0e8904c24f78f71882 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 28 Jul 2010 11:52:48 +0200 Subject: update to MediaWiki 1.16.0 --- includes/DjVuImage.php | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'includes/DjVuImage.php') diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index 8e7caf63..75df0fd5 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -224,7 +224,7 @@ class DjVuImage { * @return string */ function retrieveMetaData() { - global $wgDjvuToXML, $wgDjvuDump; + global $wgDjvuToXML, $wgDjvuDump, $wgDjvuTxt; if ( isset( $wgDjvuDump ) ) { # djvudump is faster as of version 3.5 # http://sourceforge.net/tracker/index.php?func=detail&aid=1704049&group_id=32953&atid=406583 @@ -242,6 +242,30 @@ class DjVuImage { } else { $xml = null; } + # Text layer + if ( isset( $wgDjvuTxt ) ) { + wfProfileIn( 'djvutxt' ); + $cmd = wfEscapeShellArg( $wgDjvuTxt ) . ' --detail=page ' . wfEscapeShellArg( $this->mFilename ) ; + wfDebug( __METHOD__.": $cmd\n" ); + $txt = wfShellExec( $cmd, $retval ); + wfProfileOut( 'djvutxt' ); + if( $retval == 0) { + # Get rid of invalid UTF-8, strip control characters + if( is_callable( 'iconv' ) ) { + wfSuppressWarnings(); + $txt = iconv( "UTF-8","UTF-8//IGNORE", $txt ); + wfRestoreWarnings(); + } else { + $txt = UtfNormal::cleanUp( $txt ); + } + $txt = preg_replace( "/[\013\035\037]/", "", $txt ); + $txt = htmlspecialchars($txt); + $txt = preg_replace( "/\((page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*\"([^<]*?)\"\s*|)\)/s", "", $txt ); + $txt = "\n\n\n" . $txt . "\n\n"; + $xml = preg_replace( "//", "", $xml ); + $xml = $xml . $txt. '' ; + } + } return $xml; } -- cgit v1.2.3-54-g00ecf