diff options
Diffstat (limited to 'languages/LanguageUtf8.php')
-rw-r--r-- | languages/LanguageUtf8.php | 199 |
1 files changed, 0 insertions, 199 deletions
diff --git a/languages/LanguageUtf8.php b/languages/LanguageUtf8.php deleted file mode 100644 index d738624b..00000000 --- a/languages/LanguageUtf8.php +++ /dev/null @@ -1,199 +0,0 @@ -<?php -/** - * @package MediaWiki - * @subpackage Language - */ - -if( defined( "MEDIAWIKI" ) ) { - -# This file and LanguageLatin1.php may be included from within functions, so -# we need to have global statements - -global $wgInputEncoding, $wgOutputEncoding, $wikiUpperChars, $wikiLowerChars; -global $wgDBname, $wgMemc; - -$wgInputEncoding = "UTF-8"; -$wgOutputEncoding = "UTF-8"; - -if( function_exists( 'mb_strtoupper' ) ) { - mb_internal_encoding('UTF-8'); -} else { - # Hack our own case conversion routines - - # Loading serialized arrays is faster than parsing code :P - $wikiUpperChars = $wgMemc->get( $key1 = "$wgDBname:utf8:upper" ); - $wikiLowerChars = $wgMemc->get( $key2 = "$wgDBname:utf8:lower" ); - - if(empty( $wikiUpperChars) || empty($wikiLowerChars )) { - require_once( "includes/Utf8Case.php" ); - $wgMemc->set( $key1, $wikiUpperChars ); - $wgMemc->set( $key2, $wikiLowerChars ); - } -} - -/** - * Base stuff useful to all UTF-8 based language files - * @package MediaWiki - */ -class LanguageUtf8 extends Language { - - # These functions use mbstring library, if it is loaded - # or compiled and character mapping arrays otherwise. - # In case of language-specific character mismatch - # it should be dealt with in Language classes. - - function ucfirst( $str ) { - return LanguageUtf8::uc( $str, true ); - } - - function uc( $str, $first = false ) { - if ( function_exists( 'mb_strtoupper' ) ) - if ( $first ) - if ( LanguageUtf8::isMultibyte( $str ) ) - return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); - else - return ucfirst( $str ); - else - return LanguageUtf8::isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str ); - else - if ( LanguageUtf8::isMultibyte( $str ) ) { - global $wikiUpperChars; - $x = $first ? '^' : ''; - return preg_replace( - "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - "strtr( \"\$1\" , \$wikiUpperChars )", - $str - ); - } else - return $first ? ucfirst( $str ) : strtoupper( $str ); - } - - function lcfirst( $str ) { - return LanguageUtf8::lc( $str, true ); - } - - function lc( $str, $first = false ) { - if ( function_exists( 'mb_strtolower' ) ) - if ( $first ) - if ( LanguageUtf8::isMultibyte( $str ) ) - return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 ); - else - return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ); - else - return LanguageUtf8::isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str ); - else - if ( LanguageUtf8::isMultibyte( $str ) ) { - global $wikiLowerChars; - $x = $first ? '^' : ''; - return preg_replace( - "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/e", - "strtr( \"\$1\" , \$wikiLowerChars )", - $str - ); - } else - return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str ); - } - - function isMultibyte( $str ) { - return (bool)preg_match( '/^[\x80-\xff]/', $str ); - } - - function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - - # In Language:: it just returns lowercase, maybe - # all strtolower on stripped output or argument - # should be removed and all stripForSearch - # methods adjusted to that. - - wfProfileIn( "LanguageUtf8::stripForSearch" ); - if( function_exists( 'mb_strtolower' ) ) { - $out = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "'U8' . bin2hex( \"$1\" )", - mb_strtolower( $string ) ); - } else { - global $wikiLowerChars; - $out = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "'U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); - } - wfProfileOut( "LanguageUtf8::stripForSearch" ); - return $out; - } - - function fallback8bitEncoding() { - # Windows codepage 1252 is a superset of iso 8859-1 - # override this to use difference source encoding to - # translate incoming 8-bit URLs. - return "windows-1252"; - } - - function checkTitleEncoding( $s ) { - global $wgInputEncoding; - - if( is_array( $s ) ) { - wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' ); - } - # Check for non-UTF-8 URLs - $ishigh = preg_match( '/[\x80-\xff]/', $s); - if(!$ishigh) return $s; - - $isutf8 = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . - '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); - if( $isutf8 ) return $s; - - return $this->iconv( $this->fallback8bitEncoding(), "utf-8", $s ); - } - - function firstChar( $s ) { - preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . - '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/', $s, $matches); - - return isset( $matches[1] ) ? $matches[1] : ""; - } - - # Crop a string from the beginning or end to a certain number of bytes. - # (Bytes are used because our storage has limited byte lengths for some - # columns in the database.) Multibyte charsets will need to make sure that - # only whole characters are included! - # - # $length does not include the optional ellipsis. - # If $length is negative, snip from the beginning - function truncate( $string, $length, $ellipsis = "" ) { - if( $length == 0 ) { - return $ellipsis; - } - if ( strlen( $string ) <= abs( $length ) ) { - return $string; - } - if( $length > 0 ) { - $string = substr( $string, 0, $length ); - $char = ord( $string[strlen( $string ) - 1] ); - if ($char >= 0xc0) { - # We got the first byte only of a multibyte char; remove it. - $string = substr( $string, 0, -1 ); - } elseif( $char >= 0x80 && - preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' . - '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) ) { - # We chopped in the middle of a character; remove it - $string = $m[1]; - } - return $string . $ellipsis; - } else { - $string = substr( $string, $length ); - $char = ord( $string[0] ); - if( $char >= 0x80 && $char < 0xc0 ) { - # We chopped in the middle of a character; remove the whole thing - $string = preg_replace( '/^[\x80-\xbf]+/', '', $string ); - } - return $ellipsis . $string; - } - } -} - -} # ifdef MEDIAWIKI - -?> |