diff options
Diffstat (limited to 'includes/normal')
-rw-r--r-- | includes/normal/README | 10 | ||||
-rw-r--r-- | includes/normal/RandomTest.php | 6 | ||||
-rw-r--r-- | includes/normal/UtfNormal.php | 4 | ||||
-rw-r--r-- | includes/normal/UtfNormalTest.php | 8 | ||||
-rw-r--r-- | includes/normal/UtfNormalTest2.php | 2 | ||||
-rw-r--r-- | includes/normal/UtfNormalUtil.php | 8 |
6 files changed, 18 insertions, 20 deletions
diff --git a/includes/normal/README b/includes/normal/README index a17aa7da..0f718d2c 100644 --- a/includes/normal/README +++ b/includes/normal/README @@ -48,12 +48,12 @@ grains of salt. There's an experimental PHP extension module which wraps the ICU library's normalization functions. This is *MUCH* faster than doing this work in pure -PHP code. This is in the 'normal' directory in MediaWiki's CVS extensions -module. It is known to work with PHP 4.3.8 and 5.0.2 on Linux/x86 but hasn't -been thoroughly tested on other configurations. +PHP code. This is at https://git.wikimedia.org/summary/mediawiki%2Fextensions%2Fnormal.git. +It is used by the WMF, which currently runs PHP 5.3.10 on Linux. It hasn't been +thoroughly tested on other configurations, but may work. If the php_normal.so module is loaded in php.ini, the normalization functions will automatically use it. If you can't (or don't want to) load it in php.ini, -you may be able to load it using the dl() function before include()ing or -require()ing UtfNormal.php, and it will be picked up. +you may be able to load it using the dl() function before the inclusion of +UtfNormal.php, and it will be picked up. diff --git a/includes/normal/RandomTest.php b/includes/normal/RandomTest.php index 9dc1c861..06029868 100644 --- a/includes/normal/RandomTest.php +++ b/includes/normal/RandomTest.php @@ -31,10 +31,10 @@ if( PHP_SAPI != 'cli' ) { } /** */ -require_once( 'UtfNormal.php' ); -require_once( '../diff/DifferenceEngine.php' ); +require_once 'UtfNormal.php'; +require_once '../diff/DifferenceEngine.php'; -dl('php_utfnormal.so' ); +dl( 'php_utfnormal.so' ); # mt_srand( 99999 ); diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php index 77ddb79b..5a091afc 100644 --- a/includes/normal/UtfNormal.php +++ b/includes/normal/UtfNormal.php @@ -190,7 +190,7 @@ class UtfNormal { */ static function loadData() { if( !isset( self::$utfCombiningClass ) ) { - require_once( __DIR__ . '/UtfNormalData.inc' ); + require_once __DIR__ . '/UtfNormalData.inc'; } } @@ -491,7 +491,7 @@ class UtfNormal { */ static function NFKD( $string ) { if( !isset( self::$utfCompatibilityDecomp ) ) { - require_once( 'UtfNormalDataK.inc' ); + require_once 'UtfNormalDataK.inc'; } return self::fastCombiningSort( self::fastDecompose( $string, self::$utfCompatibilityDecomp ) ); diff --git a/includes/normal/UtfNormalTest.php b/includes/normal/UtfNormalTest.php index 661e53fd..51183666 100644 --- a/includes/normal/UtfNormalTest.php +++ b/includes/normal/UtfNormalTest.php @@ -34,9 +34,7 @@ $verbose = true; if( defined( 'PRETTY_UTF8' ) ) { function pretty( $string ) { - return preg_replace( '/([\x00-\xff])/e', - 'sprintf("%02X", ord("$1"))', - $string ); + return strtoupper( bin2hex( $string ) ); } } else { /** @@ -44,9 +42,7 @@ if( defined( 'PRETTY_UTF8' ) ) { * @return string */ function pretty( $string ) { - return trim( preg_replace( '/(.)/use', - 'sprintf("%04X ", utf8ToCodepoint("$1"))', - $string ) ); + return strtoupper( utf8ToHexSequence( $string ) ); } } diff --git a/includes/normal/UtfNormalTest2.php b/includes/normal/UtfNormalTest2.php index 2266696e..750c0099 100644 --- a/includes/normal/UtfNormalTest2.php +++ b/includes/normal/UtfNormalTest2.php @@ -65,7 +65,7 @@ $f = fopen($file, "r"); later and slow down the runtime. */ -require_once("./UtfNormal.php"); +require_once './UtfNormal.php'; function normalize_form_c($c) { return UtfNormal::toNFC($c); } function normalize_form_d($c) { return UtfNormal::toNFD($c); } function normalize_form_kc($c) { return UtfNormal::toNFKC($c); } diff --git a/includes/normal/UtfNormalUtil.php b/includes/normal/UtfNormalUtil.php index 9b96a073..e8fec936 100644 --- a/includes/normal/UtfNormalUtil.php +++ b/includes/normal/UtfNormalUtil.php @@ -76,9 +76,11 @@ function hexSequenceToUtf8( $sequence ) { * @private */ function utf8ToHexSequence( $str ) { - return rtrim( preg_replace( '/(.)/uSe', - 'sprintf("%04x ", utf8ToCodepoint("$1"))', - $str ) ); + $buf = ''; + foreach ( preg_split( '//u', $str, -1, PREG_SPLIT_NO_EMPTY ) as $cp ) { + $buf .= sprintf( '%04x ', utf8ToCodepoint( $cp ) ); + } + return rtrim( $buf ); } /** |