diff options
Diffstat (limited to 'languages/classes')
38 files changed, 515 insertions, 133 deletions
diff --git a/languages/classes/LanguageAm.php b/languages/classes/LanguageAm.php new file mode 100644 index 00000000..cf31c709 --- /dev/null +++ b/languages/classes/LanguageAm.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageAm extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageAr.php b/languages/classes/LanguageAr.php index 01c921a0..70a53f86 100644 --- a/languages/classes/LanguageAr.php +++ b/languages/classes/LanguageAr.php @@ -6,22 +6,39 @@ * @author Niklas Laxström */ class LanguageAr extends Language { - function convertPlural( $count, $forms ) { if ( !count($forms) ) { return ''; } - $forms = $this->preConvertPlural( $forms, 5 ); + $forms = $this->preConvertPlural( $forms, 6 ); - if ( $count == 1 ) { + if ( $count == 0 ) { $index = 0; - } elseif( $count == 2 ) { + } elseif ( $count == 1 ) { $index = 1; - } elseif( $count < 11 && $count > 2 ) { + } elseif( $count == 2 ) { $index = 2; - } elseif( $count % 100 == 0) { + } elseif( $count % 100 >= 3 && $count % 100 <= 10 ) { $index = 3; - } else { + } elseif( $count % 100 >= 11 && $count % 100 <= 99 ) { $index = 4; + } else { + $index = 5; } return $forms[$index]; } + + /** + * Temporary hack for bug 9413: replace Arabic presentation forms with their + * standard equivalents. + * + * FIXME: This is language-specific for now only to avoid the negative + * performance impact of enabling it for all languages. + */ + function normalize( $s ) { + global $wgFixArabicUnicode; + $s = parent::normalize( $s ); + if ( $wgFixArabicUnicode ) { + $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s ); + } + return $s; + } } diff --git a/languages/classes/LanguageBe.php b/languages/classes/LanguageBe.php index 1468daff..b86a5f30 100644 --- a/languages/classes/LanguageBe.php +++ b/languages/classes/LanguageBe.php @@ -7,7 +7,6 @@ * @ingroup Language * * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> - * @bug 1638, 2135 * @link http://be.wikipedia.org/wiki/Talk:LanguageBe.php * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License * @license http://www.gnu.org/copyleft/fdl.html GNU Free Documentation License @@ -17,6 +16,8 @@ class LanguageBe extends Language { function convertPlural( $count, $forms ) { if ( !count($forms) ) { return ''; } + // FIXME: CLDR defines 4 plural forms instead of 3 + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html $forms = $this->preConvertPlural( $forms, 3 ); if ($count > 10 && floor(($count % 100) / 10) == 1) { diff --git a/languages/classes/LanguageBe_tarask.php b/languages/classes/LanguageBe_tarask.php index 358cb486..96ad9467 100644 --- a/languages/classes/LanguageBe_tarask.php +++ b/languages/classes/LanguageBe_tarask.php @@ -4,7 +4,6 @@ * @ingroup Language * * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com> - * @bug 1638, 2135 * @link http://be.wikipedia.org/wiki/Talk:LanguageBe.php * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License * @license http://www.gnu.org/copyleft/fdl.html GNU Free Documentation License @@ -25,6 +24,8 @@ class LanguageBe_tarask extends Language { //if no number with word, then use $form[0] for singular and $form[1] for plural or zero if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1]; + // FIXME: CLDR defines 4 plural forms instead of 3 + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html $forms = $this->preConvertPlural( $forms, 3 ); if ($count > 10 && floor(($count % 100) / 10) == 1) { diff --git a/languages/classes/LanguageBh.php b/languages/classes/LanguageBh.php new file mode 100644 index 00000000..80119ec7 --- /dev/null +++ b/languages/classes/LanguageBh.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageBh extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageBs.php b/languages/classes/LanguageBs.php index 24f6adde..df522f83 100644 --- a/languages/classes/LanguageBs.php +++ b/languages/classes/LanguageBs.php @@ -10,6 +10,8 @@ class LanguageBs extends Language { if ( !count($forms) ) { return ''; } $forms = $this->preConvertPlural( $forms, 3 ); + // FIXME: CLDR defines 4 plural forms instead of 3. Plural for decimals is missing. + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html if ($count > 10 && floor(($count % 100) / 10) == 1) { return $forms[2]; } else { diff --git a/languages/classes/LanguageCy.php b/languages/classes/LanguageCy.php index 486f7a7c..9a395393 100644 --- a/languages/classes/LanguageCy.php +++ b/languages/classes/LanguageCy.php @@ -8,6 +8,9 @@ class LanguageCy extends Language { function convertPlural( $count, $forms ) { if ( !count($forms) ) { return ''; } + + // FIXME: CLDR defines 4 plural forms; very different, actually. + // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#cy $forms = $this->preConvertPlural( $forms, 6 ); $count = abs( $count ); if ( $count >= 0 && $count <= 3 ) { diff --git a/languages/classes/LanguageEo.php b/languages/classes/LanguageEo.php index c58d424f..822a43f7 100644 --- a/languages/classes/LanguageEo.php +++ b/languages/classes/LanguageEo.php @@ -6,7 +6,6 @@ */ class LanguageEo extends Language { function iconv( $in, $out, $string ) { - # For most languages, this is a wrapper for iconv # Por multaj lingvoj, ĉi tiu nur voku la sisteman funkcion iconv() # Ni ankaŭ konvertu X-sistemajn surogotajn if( strcasecmp( $in, 'x' ) == 0 and strcasecmp( $out, 'utf-8' ) == 0) { @@ -42,7 +41,7 @@ class LanguageEo extends Language { return preg_replace( '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]'. '|\xc5[\x9c\x9d\xac\xad])x*)/ei', 'strtr( "$1", $ux )', $string ); } - return iconv( $in, $out, $string ); + return parent::iconv( $in, $out, $string ); } function checkTitleEncoding( $s ) { diff --git a/languages/classes/LanguageGa.php b/languages/classes/LanguageGa.php index 2fdeed04..344fdc84 100644 --- a/languages/classes/LanguageGa.php +++ b/languages/classes/LanguageGa.php @@ -34,4 +34,20 @@ class LanguageGa extends Language { } return $word; } + + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + + // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ga + $forms = $this->preConvertPlural( $forms, 3 ); + + if ( $count == 1 ) { + $index = 0; + } elseif( $count == 2 ) { + $index = 1; + } else { + $index = 2; + } + return $forms[$index]; + } } diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php index f4541059..3c8b5fdb 100644 --- a/languages/classes/LanguageGan.php +++ b/languages/classes/LanguageGan.php @@ -11,7 +11,6 @@ class GanConverter extends LanguageConverter { function __construct($langobj, $maincode, $variants=array(), $variantfallbacks=array(), - $markup=array(), $flags = array(), $manualLevel = array() ) { $this->mDescCodeSep = ':'; @@ -19,7 +18,6 @@ class GanConverter extends LanguageConverter { parent::__construct($langobj, $maincode, $variants, $variantfallbacks, - $markup, $flags, $manualLevel); $names = array( @@ -117,7 +115,7 @@ class LanguageGan extends LanguageZh { $this->mConverter = new GanConverter( $this, 'gan', $variants, $variantfallbacks, - array(),array(), + array(), $ml); $wgHooks['ArticleSaveComplete'][] = $this->mConverter; @@ -137,32 +135,16 @@ class LanguageGan extends LanguageZh { } // word segmentation - function stripForSearch( $string ) { - wfProfileIn( __METHOD__ ); - - // eventually this should be a word segmentation - // for now just treat each character as a word - // @fixme only do this for Han characters... - $t = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/", - " $1", $string); - - //always convert to gan-hans before indexing. it should be - //better to use gan-hans for search, since conversion from - //Traditional to Simplified is less ambiguous than the - //other way around - - $t = $this->mConverter->autoConvert($t, 'gan-hans'); - $t = parent::stripForSearch( $t ); - wfProfileOut( __METHOD__ ); - return $t; - + function normalizeForSearch( $string, $autoVariant = 'gan-hans' ) { + // LanguageZh::normalizeForSearch + return parent::normalizeForSearch( $string, $autoVariant ); } function convertForSearchResult( $termsArray ) { $terms = implode( '|', $termsArray ); + $terms = self::convertDoubleWidth( $terms ); $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); $ret = array_unique( explode('|', $terms) ); return $ret; } -}
\ No newline at end of file +} diff --git a/languages/classes/LanguageGd.php b/languages/classes/LanguageGd.php new file mode 100644 index 00000000..6c307f60 --- /dev/null +++ b/languages/classes/LanguageGd.php @@ -0,0 +1,35 @@ +<?php +/** Scots Gaelic (Gàidhlig) + * + * @ingroup Language + * + * @author Raimond Spekking + */ +class LanguageGd extends Language { + + /** + * Plural form transformations + * Based on this discussion: http://translatewiki.net/w/i.php?title=Portal_talk:Gd&oldid=1094065#%C3%80ireamhan + * + * $forms[0] - singular form (for 1) + * $forms[1] - dual form (for 2) + * $forms[2] - plural form 1 (for 3-10) + * $forms[3] - plural form 2 (for >= 11) + * + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 4 ); + + $count = abs( $count ); + if ( $count === 1 ) { + return $forms[0]; + } elseif ( $count === 2 ) { + return $forms[1]; + } elseif ( $count >= 3 && $count <= 10 ) { + return $forms[2]; + } else { + return $forms[3]; + } + } +} diff --git a/languages/classes/LanguageHi.php b/languages/classes/LanguageHi.php new file mode 100644 index 00000000..705c73bc --- /dev/null +++ b/languages/classes/LanguageHi.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageHi extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageHr.php b/languages/classes/LanguageHr.php index 5fe6b3a7..d8e84876 100644 --- a/languages/classes/LanguageHr.php +++ b/languages/classes/LanguageHr.php @@ -8,6 +8,8 @@ class LanguageHr extends Language { function convertPlural( $count, $forms ) { if ( !count($forms) ) { return ''; } + // FIXME: CLDR defines 4 plural forms instead of 3. Plural for for decimals is missing. + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html $forms = $this->preConvertPlural( $forms, 3 ); if ($count > 10 && floor(($count % 100) / 10) == 1) { diff --git a/languages/classes/LanguageJa.php b/languages/classes/LanguageJa.php index 72c06e19..4a24260b 100644 --- a/languages/classes/LanguageJa.php +++ b/languages/classes/LanguageJa.php @@ -6,15 +6,11 @@ * @ingroup Language */ class LanguageJa extends Language { - function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - $s = $string; + function wordSegmentation( $string ) { + // Strip known punctuation ? + // $s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f - # Strip known punctuation ? - #$s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f - - # Space strings of like hiragana/katakana/kanji + // Space strings of like hiragana/katakana/kanji $hiragana = '(?:\xe3(?:\x81[\x80-\xbf]|\x82[\x80-\x9f]))'; # U3040-309f $katakana = '(?:\xe3(?:\x82[\xa0-\xbf]|\x83[\x80-\xbf]))'; # U30a0-30ff $kanji = '(?:\xe3[\x88-\xbf][\x80-\xbf]' @@ -22,14 +18,17 @@ class LanguageJa extends Language { . '|\xe9[\x80-\xa5][\x80-\xbf]' . '|\xe9\xa6[\x80-\x99])'; # U3200-9999 = \xe3\x88\x80-\xe9\xa6\x99 - $s = preg_replace( "/({$hiragana}+|{$katakana}+|{$kanji}+)/", ' $1 ', $s ); - - # Double-width roman characters: ff00-ff5f ~= 0020-007f - $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s ); - $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s ); + $reg = "/({$hiragana}+|{$katakana}+|{$kanji}+)/"; + $s = self::insertSpace( $string, $reg ); + return $s; + } + function normalizeForSearch( $string ) { + // Double-width roman characters + $s = self::convertDoubleWidth( $string ); + # Do general case folding and UTF-8 armoring - return parent::stripForSearch( $s ); + return parent::normalizeForSearch( $s ); } # Italic is not appropriate for Japanese script diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php index b358a709..318b82a8 100644 --- a/languages/classes/LanguageKk.php +++ b/languages/classes/LanguageKk.php @@ -21,10 +21,9 @@ class KkConverter extends LanguageConverter { function __construct($langobj, $maincode, $variants=array(), $variantfallbacks=array(), - $markup=array(), $flags = array()) { parent::__construct( $langobj, $maincode, - $variants, $variantfallbacks, $markup, $flags ); + $variants, $variantfallbacks, $flags ); // No point delaying this since they're in code. // Waiting until loadDefaultTables() means they never get loaded @@ -209,16 +208,6 @@ class KkConverter extends LanguageConverter { return $carray; } - // Do not convert content on talk pages - function parserConvert( $text, &$parser ){ - if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage()) - $this->mDoContentConvert=false; - else - $this->mDoContentConvert=true; - - return parent::parserConvert($text, $parser ); - } - /* * A function wrapper: * - if there is no selected variant, leave the link diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php index 3b84a9b3..5ce68c57 100644 --- a/languages/classes/LanguageKu.php +++ b/languages/classes/LanguageKu.php @@ -132,16 +132,6 @@ class KuConverter extends LanguageConverter { ); } - // Do not convert content on talk pages - function parserConvert( $text, &$parser ){ - if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage()) - $this->mDoContentConvert=false; - else - $this->mDoContentConvert=true; - - return parent::parserConvert($text, $parser ); - } - /* * A function wrapper: * - if there is no selected variant, leave the link diff --git a/languages/classes/LanguageLn.php b/languages/classes/LanguageLn.php new file mode 100644 index 00000000..bcf5f6e3 --- /dev/null +++ b/languages/classes/LanguageLn.php @@ -0,0 +1,18 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageLn extends Language { + /** + * Use singular form for zero + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ln + + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageLv.php b/languages/classes/LanguageLv.php index 2c8d9f31..84974c37 100644 --- a/languages/classes/LanguageLv.php +++ b/languages/classes/LanguageLv.php @@ -23,6 +23,9 @@ class LanguageLv extends Language { */ function convertPlural( $count, $forms ) { if ( !count($forms) ) { return ''; } + + // FIXME: CLDR defines 3 plural forms instead of 2. Form for 0 is missing. + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#lv $forms = $this->preConvertPlural( $forms, 2 ); return ( ( $count % 10 == 1 ) && ( $count % 100 != 11 ) ) ? $forms[0] : $forms[1]; diff --git a/languages/classes/LanguageMg.php b/languages/classes/LanguageMg.php new file mode 100644 index 00000000..8593a298 --- /dev/null +++ b/languages/classes/LanguageMg.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageMg extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageMk.php b/languages/classes/LanguageMk.php new file mode 100644 index 00000000..268365df --- /dev/null +++ b/languages/classes/LanguageMk.php @@ -0,0 +1,21 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageMk extends Language { + /** + * Plural forms per + * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#mk + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + if ( $count % 10 === 1 && $count % 100 !== 11 ) { + return $forms[0]; + } else { + return $forms[1]; + } + } +} diff --git a/languages/classes/LanguageMl.php b/languages/classes/LanguageMl.php new file mode 100644 index 00000000..98b5fc96 --- /dev/null +++ b/languages/classes/LanguageMl.php @@ -0,0 +1,22 @@ +<?php + +class LanguageMl extends Language { + /** + * Temporary hack for the issue described at + * http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46396 + * Convert Unicode 5.0 style Malayalam input to Unicode 5.1. Similar to + * bug 9413. Also fixes miscellaneous problems due to mishandling of ZWJ, + * e.g. bug 11162. + * + * FIXME: This is language-specific for now only to avoid the negative + * performance impact of enabling it for all languages. + */ + function normalize( $s ) { + global $wgFixMalayalamUnicode; + $s = parent::normalize( $s ); + if ( $wgFixMalayalamUnicode ) { + $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s ); + } + return $s; + } +} diff --git a/languages/classes/LanguageMo.php b/languages/classes/LanguageMo.php new file mode 100644 index 00000000..83cfafb8 --- /dev/null +++ b/languages/classes/LanguageMo.php @@ -0,0 +1,23 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageMo extends Language { + function convertPlural( $count, $forms ) { + // Plural rules per + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#mo + if ( !count($forms) ) { return ''; } + + $forms = $this->preConvertPlural( $forms, 3 ); + + if ( $count == 1 ) { + $index = 0; + } elseif ( $count == 0 || $count % 100 < 20 ) { + $index = 1; + } else { + $index = 2; + } + return $forms[$index]; + } +} diff --git a/languages/classes/LanguageNso.php b/languages/classes/LanguageNso.php new file mode 100644 index 00000000..1b56e76a --- /dev/null +++ b/languages/classes/LanguageNso.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageNso extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguagePl.php b/languages/classes/LanguagePl.php index 54314fab..bb798913 100644 --- a/languages/classes/LanguagePl.php +++ b/languages/classes/LanguagePl.php @@ -21,4 +21,12 @@ class LanguagePl extends Language { return $forms[2]; // plural genitive } } + + function commafy($_) { + if (!preg_match('/^\d{1,4}(.\d+)?$/',$_)) { + return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_))); + } else { + return $_; + } + } } diff --git a/languages/classes/LanguageRo.php b/languages/classes/LanguageRo.php new file mode 100644 index 00000000..928f79fb --- /dev/null +++ b/languages/classes/LanguageRo.php @@ -0,0 +1,23 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageRo extends Language { + function convertPlural( $count, $forms ) { + // Plural rules per + // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ro + if ( !count($forms) ) { return ''; } + + $forms = $this->preConvertPlural( $forms, 3 ); + + if ( $count == 1 ) { + $index = 0; + } elseif ( $count == 0 || $count % 100 < 20 ) { + $index = 1; + } else { + $index = 2; + } + return $forms[$index]; + } +} diff --git a/languages/classes/LanguageRu.php b/languages/classes/LanguageRu.php index 5933b47a..bb737e84 100644 --- a/languages/classes/LanguageRu.php +++ b/languages/classes/LanguageRu.php @@ -76,6 +76,8 @@ class LanguageRu extends Language { //if no number with word, then use $form[0] for singular and $form[1] for plural or zero if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1]; + // FIXME: CLDR defines 4 plural forms. Form with decimals missing. + // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ru $forms = $this->preConvertPlural( $forms, 3 ); if ($count > 10 && floor(($count % 100) / 10) == 1) { diff --git a/languages/classes/LanguageSe.php b/languages/classes/LanguageSe.php new file mode 100644 index 00000000..aac943d6 --- /dev/null +++ b/languages/classes/LanguageSe.php @@ -0,0 +1,22 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageSe extends Language { + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + + // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#se + $forms = $this->preConvertPlural( $forms, 3 ); + + if ( $count == 1 ) { + $index = 0; + } elseif( $count == 2 ) { + $index = 1; + } else { + $index = 2; + } + return $forms[$index]; + } +} diff --git a/languages/classes/LanguageSh.php b/languages/classes/LanguageSh.php new file mode 100644 index 00000000..29f06f4f --- /dev/null +++ b/languages/classes/LanguageSh.php @@ -0,0 +1,29 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageSh extends Language { + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + + //if no number with word, then use $form[0] for singular and $form[1] for plural or zero + if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1]; + + // FIXME: CLDR defines 4 plural forms. Form with decimals missing. + // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#sh + $forms = $this->preConvertPlural( $forms, 3 ); + + if ($count > 10 && floor(($count % 100) / 10) == 1) { + return $forms[2]; + } else { + switch ($count % 10) { + case 1: return $forms[0]; + case 2: + case 3: + case 4: return $forms[1]; + default: return $forms[2]; + } + } + } +} diff --git a/languages/classes/LanguageSma.php b/languages/classes/LanguageSma.php new file mode 100644 index 00000000..5167fb08 --- /dev/null +++ b/languages/classes/LanguageSma.php @@ -0,0 +1,22 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageSma extends Language { + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + + // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#sma + $forms = $this->preConvertPlural( $forms, 3 ); + + if ( $count == 1 ) { + $index = 1; + } elseif( $count == 2 ) { + $index = 2; + } else { + $index = 3; + } + return $forms[$index]; + } +} diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php index 79a2ec25..693660fb 100644 --- a/languages/classes/LanguageSr.php +++ b/languages/classes/LanguageSr.php @@ -76,16 +76,6 @@ class SrConverter extends LanguageConverter { return $carray; } - // Do not convert content on talk pages - function parserConvert( $text, &$parser ){ - if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage()) - $this->mDoContentConvert=false; - else - $this->mDoContentConvert=true; - - return parent::parserConvert($text, $parser ); - } - /* * A function wrapper: * - if there is no selected variant, leave the link @@ -175,12 +165,34 @@ class LanguageSr extends LanguageSr_ec { 'sr-el' => 'sr', ); - $marker = array();//don't mess with these, leave them as they are $flags = array( 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W' ); - $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $marker, $flags); + $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $flags); $wgHooks['ArticleSaveComplete'][] = $this->mConverter; } + + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + + //if no number with word, then use $form[0] for singular and $form[1] for plural or zero + if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1]; + + // FIXME: CLDR defines 4 plural forms. Form with decimals missing. + // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ru + $forms = $this->preConvertPlural( $forms, 3 ); + + if ($count > 10 && floor(($count % 100) / 10) == 1) { + return $forms[2]; + } else { + switch ($count % 10) { + case 1: return $forms[0]; + case 2: + case 3: + case 4: return $forms[1]; + default: return $forms[2]; + } + } + } } diff --git a/languages/classes/LanguageTi.php b/languages/classes/LanguageTi.php new file mode 100644 index 00000000..1974915d --- /dev/null +++ b/languages/classes/LanguageTi.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageTi extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageTl.php b/languages/classes/LanguageTl.php new file mode 100644 index 00000000..23b6ad6f --- /dev/null +++ b/languages/classes/LanguageTl.php @@ -0,0 +1,16 @@ +<?php +/** + * + * @ingroup Language + */ +class LanguageTl extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } +} diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php index 8a2dee09..57b0fd07 100644 --- a/languages/classes/LanguageTr.php +++ b/languages/classes/LanguageTr.php @@ -7,7 +7,7 @@ */ class LanguageTr extends Language { function ucfirst ( $string ) { - if ( $string[0] == 'i' ) { + if ( !empty($string) && $string[0] == 'i' ) { return 'İ' . substr( $string, 1 ); } else { return parent::ucfirst( $string ); diff --git a/languages/classes/LanguageUk.php b/languages/classes/LanguageUk.php index dbebb134..4b1dfde5 100644 --- a/languages/classes/LanguageUk.php +++ b/languages/classes/LanguageUk.php @@ -62,6 +62,8 @@ class LanguageUk extends Language { //if no number with word, then use $form[0] for singular and $form[1] for plural or zero if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1]; + // FIXME: CLDR defines 4 plural forms. Form for decimals is missing/ + // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#uk $forms = $this->preConvertPlural( $forms, 3 ); if ($count > 10 && floor(($count % 100) / 10) == 1) { diff --git a/languages/classes/LanguageWa.php b/languages/classes/LanguageWa.php index d97b2026..4a4296ca 100644 --- a/languages/classes/LanguageWa.php +++ b/languages/classes/LanguageWa.php @@ -9,14 +9,22 @@ # k' i gn a. class LanguageWa extends Language { + /** + * Use singular form for zero + */ + function convertPlural( $count, $forms ) { + if ( !count($forms) ) { return ''; } + $forms = $this->preConvertPlural( $forms, 2 ); + + return ($count <= 1) ? $forms[0] : $forms[1]; + } + ### ### Dates in Walloon are "1î d' <monthname>" for 1st of the month, ### "<day> di <monthname>" for months starting by a consoun, and ### "<day> d' <monthname>" for months starting with a vowel ### function date( $ts, $adj = false, $format = true, $tc = false ) { - global $wgUser; - if ( $adj ) { $ts = $this->userAdjust( $ts, $tc ); } $datePreference = $this->dateFormat( $format ); diff --git a/languages/classes/LanguageYue.php b/languages/classes/LanguageYue.php index fc7f233c..6581d788 100644 --- a/languages/classes/LanguageYue.php +++ b/languages/classes/LanguageYue.php @@ -3,19 +3,30 @@ * @ingroup Language */ class LanguageYue extends Language { - function stripForSearch( $string ) { + function hasWordBreaks() { + return false; + } + + /** + * Eventually this should be a word segmentation; + * for now just treat each character as a word. + * @todo Fixme: only do this for Han characters... + */ + function wordSegmentation( $string ) { + $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; + $s = self::insertSpace( $string, $reg ); + return $s; + } + + function normalizeForSearch( $string ) { wfProfileIn( __METHOD__ ); - // eventually this should be a word segmentation - // for now just treat each character as a word - // @fixme only do this for Han characters... - $t = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/", - " $1", $string); + // Double-width roman characters + $s = self::convertDoubleWidth( $string ); + $s = trim( $s ); + $s = parent::normalizeForSearch( $s ); - // Do general case folding and UTF-8 armoring - $t = parent::stripForSearch( $t ); wfProfileOut( __METHOD__ ); - return $t; + return $s; } } diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php index cbb748c3..4a73c665 100644 --- a/languages/classes/LanguageZh.php +++ b/languages/classes/LanguageZh.php @@ -11,7 +11,6 @@ class ZhConverter extends LanguageConverter { function __construct($langobj, $maincode, $variants=array(), $variantfallbacks=array(), - $markup=array(), $flags = array(), $manualLevel = array() ) { $this->mDescCodeSep = ':'; @@ -19,7 +18,6 @@ class ZhConverter extends LanguageConverter { parent::__construct($langobj, $maincode, $variants, $variantfallbacks, - $markup, $flags, $manualLevel); $names = array( @@ -133,6 +131,7 @@ class LanguageZh extends LanguageZh_hans { parent::__construct(); $variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw'); + $variantfallbacks = array( 'zh' => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'), 'zh-hans' => array('zh-cn','zh-sg','zh-my'), @@ -152,7 +151,7 @@ class LanguageZh extends LanguageZh_hans { $this->mConverter = new ZhConverter( $this, 'zh', $variants, $variantfallbacks, - array(),array(), + array(), $ml); $wgHooks['ArticleSaveComplete'][] = $this->mConverter; @@ -171,31 +170,31 @@ class LanguageZh extends LanguageZh_hans { "\"$1\"", $text); } - // word segmentation - function stripForSearch( $string ) { + /** + * auto convert to zh-hans and normalize special characters. + * + * @param $string String + * @param $autoVariant String, default to 'zh-hans' + * @return String + */ + function normalizeForSearch( $string, $autoVariant = 'zh-hans' ) { wfProfileIn( __METHOD__ ); - // eventually this should be a word segmentation - // for now just treat each character as a word - // @fixme only do this for Han characters... - $t = preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/", - " $1", $string); - - //always convert to zh-hans before indexing. it should be - //better to use zh-hans for search, since conversion from - //Traditional to Simplified is less ambiguous than the - //other way around - - $t = $this->mConverter->autoConvert($t, 'zh-hans'); - $t = parent::stripForSearch( $t ); + // always convert to zh-hans before indexing. it should be + // better to use zh-hans for search, since conversion from + // Traditional to Simplified is less ambiguous than the + // other way around + $s = $this->mConverter->autoConvert( $string, $autoVariant ); + // LanguageZh_hans::normalizeForSearch + $s = parent::normalizeForSearch( $s ); wfProfileOut( __METHOD__ ); - return $t; + return $s; } function convertForSearchResult( $termsArray ) { $terms = implode( '|', $termsArray ); + $terms = self::convertDoubleWidth( $terms ); $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); $ret = array_unique( explode('|', $terms) ); return $ret; diff --git a/languages/classes/LanguageZh_hans.php b/languages/classes/LanguageZh_hans.php index 983dd485..5b03d731 100644 --- a/languages/classes/LanguageZh_hans.php +++ b/languages/classes/LanguageZh_hans.php @@ -4,21 +4,30 @@ * @ingroup Language */ class LanguageZh_hans extends Language { - function stripForSearch( $string ) { - # MySQL fulltext index doesn't grok utf-8, so we - # need to fold cases and convert to hex - # we also separate characters as "words" - if( function_exists( 'mb_strtolower' ) ) { - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( \"$1\" )", - mb_strtolower( $string ) ); - } else { - list( , $wikiLowerChars ) = Language::getCaseMaps(); - return preg_replace( - "/([\\xc0-\\xff][\\x80-\\xbf]*)/e", - "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )", - $string ); - } + function hasWordBreaks() { + return false; } -} + + /** + * Eventually this should be a word segmentation; + * for now just treat each character as a word. + * @todo Fixme: only do this for Han characters... + */ + function wordSegmentation( $string ) { + $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/"; + $s = self::insertSpace( $string, $reg ); + return $s; + } + + function normalizeForSearch( $string ) { + wfProfileIn( __METHOD__ ); + + // Double-width roman characters + $s = self::convertDoubleWidth( $string ); + $s = trim( $s ); + $s = parent::normalizeForSearch( $s ); + + wfProfileOut( __METHOD__ ); + return $s; + } +}
\ No newline at end of file |