diff options
Diffstat (limited to 'includes/normal/UtfNormal.php')
-rw-r--r-- | includes/normal/UtfNormal.php | 54 |
1 files changed, 35 insertions, 19 deletions
diff --git a/includes/normal/UtfNormal.php b/includes/normal/UtfNormal.php index af3809d5..d8eac7b8 100644 --- a/includes/normal/UtfNormal.php +++ b/includes/normal/UtfNormal.php @@ -124,8 +124,9 @@ class UtfNormal { * * @param string $string a UTF-8 string * @return string a clean, shiny, normalized UTF-8 string + * @static */ - function cleanUp( $string ) { + static function cleanUp( $string ) { if( NORMALIZE_ICU ) { # We exclude a few chars that ICU would not. $string = preg_replace( @@ -153,8 +154,9 @@ class UtfNormal { * * @param string $string a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form C + * @static */ - function toNFC( $string ) { + static function toNFC( $string ) { if( NORMALIZE_ICU ) return utf8_normalize( $string, UNORM_NFC ); elseif( UtfNormal::quickIsNFC( $string ) ) @@ -169,8 +171,9 @@ class UtfNormal { * * @param string $string a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form D + * @static */ - function toNFD( $string ) { + static function toNFD( $string ) { if( NORMALIZE_ICU ) return utf8_normalize( $string, UNORM_NFD ); elseif( preg_match( '/[\x80-\xff]/', $string ) ) @@ -186,8 +189,9 @@ class UtfNormal { * * @param string $string a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form KC + * @static */ - function toNFKC( $string ) { + static function toNFKC( $string ) { if( NORMALIZE_ICU ) return utf8_normalize( $string, UNORM_NFKC ); elseif( preg_match( '/[\x80-\xff]/', $string ) ) @@ -203,8 +207,9 @@ class UtfNormal { * * @param string $string a valid UTF-8 string. Input is not validated. * @return string a UTF-8 string in normal form KD + * @static */ - function toNFKD( $string ) { + static function toNFKD( $string ) { if( NORMALIZE_ICU ) return utf8_normalize( $string, UNORM_NFKD ); elseif( preg_match( '/[\x80-\xff]/', $string ) ) @@ -216,10 +221,10 @@ class UtfNormal { /** * Load the basic composition data if necessary * @private + * @static */ - function loadData() { - # fixme : are $utfCanonicalComp, $utfCanonicalDecomp really used? - global $utfCombiningClass, $utfCanonicalComp, $utfCanonicalDecomp; + static function loadData() { + global $utfCombiningClass; if( !isset( $utfCombiningClass ) ) { require_once( 'UtfNormalData.inc' ); } @@ -230,8 +235,9 @@ class UtfNormal { * Returns false if not or uncertain. * @param string $string a valid UTF-8 string. Input is not validated. * @return bool + * @static */ - function quickIsNFC( $string ) { + static function quickIsNFC( $string ) { # ASCII is always valid NFC! # If it's pure ASCII, let it through. if( !preg_match( '/[\x80-\xff]/', $string ) ) return true; @@ -270,8 +276,9 @@ class UtfNormal { * Returns true if the string is _definitely_ in NFC. * Returns false if not or uncertain. * @param string $string a UTF-8 string, altered on output to be valid UTF-8 safe for XML. + * @static */ - function quickIsNFCVerify( &$string ) { + static function quickIsNFCVerify( &$string ) { # Screen out some characters that eg won't be allowed in XML $string = preg_replace( '/[\x00-\x08\x0b\x0c\x0e-\x1f]/', UTF8_REPLACEMENT, $string ); @@ -321,6 +328,7 @@ class UtfNormal { # large ASCII parts can be handled much more quickly. # Don't chop up Unicode areas for punctuation, though, # that wastes energy. + $matches = array(); preg_match_all( '/([\x00-\x7f]+|[\x80-\xff][\x00-\x40\x5b-\x5f\x7b-\xff]*)/', $string, $matches ); @@ -488,8 +496,9 @@ class UtfNormal { * @param string $string * @return string * @private + * @static */ - function NFC( $string ) { + static function NFC( $string ) { return UtfNormal::fastCompose( UtfNormal::NFD( $string ) ); } @@ -497,8 +506,9 @@ class UtfNormal { * @param string $string * @return string * @private + * @static */ - function NFD( $string ) { + static function NFD( $string ) { UtfNormal::loadData(); global $utfCanonicalDecomp; return UtfNormal::fastCombiningSort( @@ -509,8 +519,9 @@ class UtfNormal { * @param string $string * @return string * @private + * @static */ - function NFKC( $string ) { + static function NFKC( $string ) { return UtfNormal::fastCompose( UtfNormal::NFKD( $string ) ); } @@ -518,8 +529,9 @@ class UtfNormal { * @param string $string * @return string * @private + * @static */ - function NFKD( $string ) { + static function NFKD( $string ) { global $utfCompatibilityDecomp; if( !isset( $utfCompatibilityDecomp ) ) { require_once( 'UtfNormalDataK.inc' ); @@ -537,8 +549,9 @@ class UtfNormal { * @param string $string Valid UTF-8 string * @param array $map hash of expanded decomposition map * @return string a UTF-8 string decomposed, not yet normalized (needs sorting) + * @static */ - function fastDecompose( $string, &$map ) { + static function fastDecompose( $string, $map ) { UtfNormal::loadData(); $len = strlen( $string ); $out = ''; @@ -597,8 +610,9 @@ class UtfNormal { * @private * @param string $string a valid, decomposed UTF-8 string. Input is not validated. * @return string a UTF-8 string with combining characters sorted in canonical order + * @static */ - function fastCombiningSort( $string ) { + static function fastCombiningSort( $string ) { UtfNormal::loadData(); global $utfCombiningClass; $len = strlen( $string ); @@ -646,8 +660,9 @@ class UtfNormal { * @private * @param string $string a valid UTF-8 string in sorted normal form D or KD. Input is not validated. * @return string a UTF-8 string with canonical precomposed characters used where possible + * @static */ - function fastCompose( $string ) { + static function fastCompose( $string ) { UtfNormal::loadData(); global $utfCanonicalComp, $utfCombiningClass; $len = strlen( $string ); @@ -778,8 +793,9 @@ class UtfNormal { * interate through a string without really doing anything of substance. * @param string $string * @return string + * @static */ - function placebo( $string ) { + static function placebo( $string ) { $len = strlen( $string ); $out = ''; for( $i = 0; $i < $len; $i++ ) { @@ -789,4 +805,4 @@ class UtfNormal { } } -?> +?>
\ No newline at end of file |