From 9db190c7e736ec8d063187d4241b59feaf7dc2d1 Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Wed, 22 Jun 2011 11:28:20 +0200 Subject: update to MediaWiki 1.17.0 --- languages/classes/LanguageEo.php | 114 ++++++++++++++++++++++++++------------- 1 file changed, 77 insertions(+), 37 deletions(-) (limited to 'languages/classes/LanguageEo.php') diff --git a/languages/classes/LanguageEo.php b/languages/classes/LanguageEo.php index 822a43f7..7ec447e7 100644 --- a/languages/classes/LanguageEo.php +++ b/languages/classes/LanguageEo.php @@ -3,63 +3,103 @@ /** Esperanto (Esperanto) * * @ingroup Language + * @author Brion Vibber */ class LanguageEo extends Language { + /** + * Wrapper for charset conversions. + * + * In most languages, this calls through to standard system iconv(), but + * for Esperanto we're also adding a special pseudo-charset to convert + * accented characters to/from the ASCII-friendly "X" surrogate coding: + * + * cx = ĉ cxx = cx + * gx = ĝ gxx = gx + * hx = ĥ hxx = hx + * jx = ĵ jxx = jx + * sx = ŝ sxx = sx + * ux = ŭ uxx = ux + * xx = x + * + * http://en.wikipedia.org/wiki/Esperanto_orthography#X-system + * http://eo.wikipedia.org/wiki/X-sistemo + * + * X-conversion is applied, in either direction, between "utf-8" and "x" charsets; + * this comes into effect when input is run through $wgRequest->getText() and the + * $wgEditEncoding is set to 'x'. + * + * In the long run, this should be moved out of here and into the client-side + * editor behavior; the original server-side translation system dates to 2002-2003 + * when many browsers with really bad Unicode support were still in use. + * + * @param string $in input character set + * @param string $out output character set + * @param string $string text to be converted + * @return string + */ function iconv( $in, $out, $string ) { - # Por multaj lingvoj, ĉi tiu nur voku la sisteman funkcion iconv() - # Ni ankaŭ konvertu X-sistemajn surogotajn - if( strcasecmp( $in, 'x' ) == 0 and strcasecmp( $out, 'utf-8' ) == 0) { - $xu = array ( - 'xx' => 'x' , 'xX' => 'x' , - 'Xx' => 'X' , 'XX' => 'X' , - "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" , - "cx" => "\xc4\x89" , "cX" => "\xc4\x89" , - "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" , - "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" , - "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" , - "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" , - "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" , - "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" , - "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" , - "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" , - "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" , - "ux" => "\xc5\xad" , "uX" => "\xc5\xad" - ) ; - return preg_replace ( '/([cghjsu]x?)((?:xx)*)(?!x)/ei', - 'strtr( "$1", $xu ) . strtr( "$2", $xu )', $string ); - } else if( strcasecmp( $in, 'UTF-8' ) == 0 and strcasecmp( $out, 'x' ) == 0 ) { - $ux = array ( - 'x' => 'xx' , 'X' => 'Xx' , - "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" , - "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" , - "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" , - "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" , - "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" , - "\xc5\xac" => "Ux" , "\xc5\xad" => "ux" - ) ; + if ( strcasecmp( $in, 'x' ) == 0 && strcasecmp( $out, 'utf-8' ) == 0 ) { + return preg_replace_callback ( + '/([cghjsu]x?)((?:xx)*)(?!x)/i', + array( $this, 'strrtxuCallback' ), $string ); + } else if ( strcasecmp( $in, 'UTF-8' ) == 0 && strcasecmp( $out, 'x' ) == 0 ) { # Double Xs only if they follow cxapelutaj literoj. - return preg_replace( '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]'. - '|\xc5[\x9c\x9d\xac\xad])x*)/ei', 'strtr( "$1", $ux )', $string ); + return preg_replace_callback( + '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]|\xc5[\x9c\x9d\xac\xad])x*)/i', + array( $this, 'strrtuxCallback' ), $string ); } return parent::iconv( $in, $out, $string ); } + function strrtuxCallback( $matches ) { + static $ux = array ( + 'x' => 'xx' , 'X' => 'Xx' , + "\xc4\x88" => "Cx" , "\xc4\x89" => "cx" , + "\xc4\x9c" => "Gx" , "\xc4\x9d" => "gx" , + "\xc4\xa4" => "Hx" , "\xc4\xa5" => "hx" , + "\xc4\xb4" => "Jx" , "\xc4\xb5" => "jx" , + "\xc5\x9c" => "Sx" , "\xc5\x9d" => "sx" , + "\xc5\xac" => "Ux" , "\xc5\xad" => "ux" + ); + return strtr( $matches[1], $ux ); + } + + function strrtxuCallback( $matches ) { + static $xu = array ( + 'xx' => 'x' , 'xX' => 'x' , + 'Xx' => 'X' , 'XX' => 'X' , + "Cx" => "\xc4\x88" , "CX" => "\xc4\x88" , + "cx" => "\xc4\x89" , "cX" => "\xc4\x89" , + "Gx" => "\xc4\x9c" , "GX" => "\xc4\x9c" , + "gx" => "\xc4\x9d" , "gX" => "\xc4\x9d" , + "Hx" => "\xc4\xa4" , "HX" => "\xc4\xa4" , + "hx" => "\xc4\xa5" , "hX" => "\xc4\xa5" , + "Jx" => "\xc4\xb4" , "JX" => "\xc4\xb4" , + "jx" => "\xc4\xb5" , "jX" => "\xc4\xb5" , + "Sx" => "\xc5\x9c" , "SX" => "\xc5\x9c" , + "sx" => "\xc5\x9d" , "sX" => "\xc5\x9d" , + "Ux" => "\xc5\xac" , "UX" => "\xc5\xac" , + "ux" => "\xc5\xad" , "uX" => "\xc5\xad" + ); + return strtr( $matches[1], $xu ) . strtr( $matches[2], $xu ); + } + function checkTitleEncoding( $s ) { # Check for X-system backwards-compatibility URLs - $ishigh = preg_match( '/[\x80-\xff]/', $s); + $ishigh = preg_match( '/[\x80-\xff]/', $s ); $isutf = preg_match( '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' . '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})+$/', $s ); - if($ishigh and !$isutf) { + if ( $ishigh and !$isutf ) { # Assume Latin1 $s = utf8_encode( $s ); } else { - if( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]'. + if ( preg_match( '/(\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]' . '|\xc5[\x9c\x9d\xac\xad])/', $s ) ) return $s; } - //if( preg_match( '/[cghjsu]x/i', $s ) ) + // if( preg_match( '/[cghjsu]x/i', $s ) ) // return $this->iconv( 'x', 'utf-8', $s ); return $s; } -- cgit v1.2.3-54-g00ecf