summaryrefslogtreecommitdiff
path: root/languages/classes
diff options
context:
space:
mode:
Diffstat (limited to 'languages/classes')
-rw-r--r--languages/classes/LanguageAm.php16
-rw-r--r--languages/classes/LanguageAr.php31
-rw-r--r--languages/classes/LanguageBe.php3
-rw-r--r--languages/classes/LanguageBe_tarask.php3
-rw-r--r--languages/classes/LanguageBh.php16
-rw-r--r--languages/classes/LanguageBs.php2
-rw-r--r--languages/classes/LanguageCy.php3
-rw-r--r--languages/classes/LanguageEo.php3
-rw-r--r--languages/classes/LanguageGa.php16
-rw-r--r--languages/classes/LanguageGan.php30
-rw-r--r--languages/classes/LanguageGd.php35
-rw-r--r--languages/classes/LanguageHi.php16
-rw-r--r--languages/classes/LanguageHr.php2
-rw-r--r--languages/classes/LanguageJa.php27
-rw-r--r--languages/classes/LanguageKk.php13
-rw-r--r--languages/classes/LanguageKu.php10
-rw-r--r--languages/classes/LanguageLn.php18
-rw-r--r--languages/classes/LanguageLv.php3
-rw-r--r--languages/classes/LanguageMg.php16
-rw-r--r--languages/classes/LanguageMk.php21
-rw-r--r--languages/classes/LanguageMl.php22
-rw-r--r--languages/classes/LanguageMo.php23
-rw-r--r--languages/classes/LanguageNso.php16
-rw-r--r--languages/classes/LanguagePl.php8
-rw-r--r--languages/classes/LanguageRo.php23
-rw-r--r--languages/classes/LanguageRu.php2
-rw-r--r--languages/classes/LanguageSe.php22
-rw-r--r--languages/classes/LanguageSh.php29
-rw-r--r--languages/classes/LanguageSma.php22
-rw-r--r--languages/classes/LanguageSr.php36
-rw-r--r--languages/classes/LanguageTi.php16
-rw-r--r--languages/classes/LanguageTl.php16
-rw-r--r--languages/classes/LanguageTr.php2
-rw-r--r--languages/classes/LanguageUk.php2
-rw-r--r--languages/classes/LanguageWa.php12
-rw-r--r--languages/classes/LanguageYue.php31
-rw-r--r--languages/classes/LanguageZh.php39
-rw-r--r--languages/classes/LanguageZh_hans.php43
38 files changed, 515 insertions, 133 deletions
diff --git a/languages/classes/LanguageAm.php b/languages/classes/LanguageAm.php
new file mode 100644
index 00000000..cf31c709
--- /dev/null
+++ b/languages/classes/LanguageAm.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageAm extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageAr.php b/languages/classes/LanguageAr.php
index 01c921a0..70a53f86 100644
--- a/languages/classes/LanguageAr.php
+++ b/languages/classes/LanguageAr.php
@@ -6,22 +6,39 @@
* @author Niklas Laxström
*/
class LanguageAr extends Language {
-
function convertPlural( $count, $forms ) {
if ( !count($forms) ) { return ''; }
- $forms = $this->preConvertPlural( $forms, 5 );
+ $forms = $this->preConvertPlural( $forms, 6 );
- if ( $count == 1 ) {
+ if ( $count == 0 ) {
$index = 0;
- } elseif( $count == 2 ) {
+ } elseif ( $count == 1 ) {
$index = 1;
- } elseif( $count < 11 && $count > 2 ) {
+ } elseif( $count == 2 ) {
$index = 2;
- } elseif( $count % 100 == 0) {
+ } elseif( $count % 100 >= 3 && $count % 100 <= 10 ) {
$index = 3;
- } else {
+ } elseif( $count % 100 >= 11 && $count % 100 <= 99 ) {
$index = 4;
+ } else {
+ $index = 5;
}
return $forms[$index];
}
+
+ /**
+ * Temporary hack for bug 9413: replace Arabic presentation forms with their
+ * standard equivalents.
+ *
+ * FIXME: This is language-specific for now only to avoid the negative
+ * performance impact of enabling it for all languages.
+ */
+ function normalize( $s ) {
+ global $wgFixArabicUnicode;
+ $s = parent::normalize( $s );
+ if ( $wgFixArabicUnicode ) {
+ $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
+ }
+ return $s;
+ }
}
diff --git a/languages/classes/LanguageBe.php b/languages/classes/LanguageBe.php
index 1468daff..b86a5f30 100644
--- a/languages/classes/LanguageBe.php
+++ b/languages/classes/LanguageBe.php
@@ -7,7 +7,6 @@
* @ingroup Language
*
* @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
- * @bug 1638, 2135
* @link http://be.wikipedia.org/wiki/Talk:LanguageBe.php
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
* @license http://www.gnu.org/copyleft/fdl.html GNU Free Documentation License
@@ -17,6 +16,8 @@ class LanguageBe extends Language {
function convertPlural( $count, $forms ) {
if ( !count($forms) ) { return ''; }
+ // FIXME: CLDR defines 4 plural forms instead of 3
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
$forms = $this->preConvertPlural( $forms, 3 );
if ($count > 10 && floor(($count % 100) / 10) == 1) {
diff --git a/languages/classes/LanguageBe_tarask.php b/languages/classes/LanguageBe_tarask.php
index 358cb486..96ad9467 100644
--- a/languages/classes/LanguageBe_tarask.php
+++ b/languages/classes/LanguageBe_tarask.php
@@ -4,7 +4,6 @@
* @ingroup Language
*
* @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
- * @bug 1638, 2135
* @link http://be.wikipedia.org/wiki/Talk:LanguageBe.php
* @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
* @license http://www.gnu.org/copyleft/fdl.html GNU Free Documentation License
@@ -25,6 +24,8 @@ class LanguageBe_tarask extends Language {
//if no number with word, then use $form[0] for singular and $form[1] for plural or zero
if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1];
+ // FIXME: CLDR defines 4 plural forms instead of 3
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
$forms = $this->preConvertPlural( $forms, 3 );
if ($count > 10 && floor(($count % 100) / 10) == 1) {
diff --git a/languages/classes/LanguageBh.php b/languages/classes/LanguageBh.php
new file mode 100644
index 00000000..80119ec7
--- /dev/null
+++ b/languages/classes/LanguageBh.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageBh extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageBs.php b/languages/classes/LanguageBs.php
index 24f6adde..df522f83 100644
--- a/languages/classes/LanguageBs.php
+++ b/languages/classes/LanguageBs.php
@@ -10,6 +10,8 @@ class LanguageBs extends Language {
if ( !count($forms) ) { return ''; }
$forms = $this->preConvertPlural( $forms, 3 );
+ // FIXME: CLDR defines 4 plural forms instead of 3. Plural for decimals is missing.
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
if ($count > 10 && floor(($count % 100) / 10) == 1) {
return $forms[2];
} else {
diff --git a/languages/classes/LanguageCy.php b/languages/classes/LanguageCy.php
index 486f7a7c..9a395393 100644
--- a/languages/classes/LanguageCy.php
+++ b/languages/classes/LanguageCy.php
@@ -8,6 +8,9 @@
class LanguageCy extends Language {
function convertPlural( $count, $forms ) {
if ( !count($forms) ) { return ''; }
+
+ // FIXME: CLDR defines 4 plural forms; very different, actually.
+ // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#cy
$forms = $this->preConvertPlural( $forms, 6 );
$count = abs( $count );
if ( $count >= 0 && $count <= 3 ) {
diff --git a/languages/classes/LanguageEo.php b/languages/classes/LanguageEo.php
index c58d424f..822a43f7 100644
--- a/languages/classes/LanguageEo.php
+++ b/languages/classes/LanguageEo.php
@@ -6,7 +6,6 @@
*/
class LanguageEo extends Language {
function iconv( $in, $out, $string ) {
- # For most languages, this is a wrapper for iconv
# Por multaj lingvoj, ĉi tiu nur voku la sisteman funkcion iconv()
# Ni ankaŭ konvertu X-sistemajn surogotajn
if( strcasecmp( $in, 'x' ) == 0 and strcasecmp( $out, 'utf-8' ) == 0) {
@@ -42,7 +41,7 @@ class LanguageEo extends Language {
return preg_replace( '/((?:[cghjsu]|\xc4[\x88\x89\x9c\x9d\xa4\xa5\xb4\xb5]'.
'|\xc5[\x9c\x9d\xac\xad])x*)/ei', 'strtr( "$1", $ux )', $string );
}
- return iconv( $in, $out, $string );
+ return parent::iconv( $in, $out, $string );
}
function checkTitleEncoding( $s ) {
diff --git a/languages/classes/LanguageGa.php b/languages/classes/LanguageGa.php
index 2fdeed04..344fdc84 100644
--- a/languages/classes/LanguageGa.php
+++ b/languages/classes/LanguageGa.php
@@ -34,4 +34,20 @@ class LanguageGa extends Language {
}
return $word;
}
+
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+
+ // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ga
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ( $count == 1 ) {
+ $index = 0;
+ } elseif( $count == 2 ) {
+ $index = 1;
+ } else {
+ $index = 2;
+ }
+ return $forms[$index];
+ }
}
diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php
index f4541059..3c8b5fdb 100644
--- a/languages/classes/LanguageGan.php
+++ b/languages/classes/LanguageGan.php
@@ -11,7 +11,6 @@ class GanConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array(),
$manualLevel = array() ) {
$this->mDescCodeSep = ':';
@@ -19,7 +18,6 @@ class GanConverter extends LanguageConverter {
parent::__construct($langobj, $maincode,
$variants,
$variantfallbacks,
- $markup,
$flags,
$manualLevel);
$names = array(
@@ -117,7 +115,7 @@ class LanguageGan extends LanguageZh {
$this->mConverter = new GanConverter( $this, 'gan',
$variants, $variantfallbacks,
- array(),array(),
+ array(),
$ml);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
@@ -137,32 +135,16 @@ class LanguageGan extends LanguageZh {
}
// word segmentation
- function stripForSearch( $string ) {
- wfProfileIn( __METHOD__ );
-
- // eventually this should be a word segmentation
- // for now just treat each character as a word
- // @fixme only do this for Han characters...
- $t = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1", $string);
-
- //always convert to gan-hans before indexing. it should be
- //better to use gan-hans for search, since conversion from
- //Traditional to Simplified is less ambiguous than the
- //other way around
-
- $t = $this->mConverter->autoConvert($t, 'gan-hans');
- $t = parent::stripForSearch( $t );
- wfProfileOut( __METHOD__ );
- return $t;
-
+ function normalizeForSearch( $string, $autoVariant = 'gan-hans' ) {
+ // LanguageZh::normalizeForSearch
+ return parent::normalizeForSearch( $string, $autoVariant );
}
function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
+ $terms = self::convertDoubleWidth( $terms );
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode('|', $terms) );
return $ret;
}
-} \ No newline at end of file
+}
diff --git a/languages/classes/LanguageGd.php b/languages/classes/LanguageGd.php
new file mode 100644
index 00000000..6c307f60
--- /dev/null
+++ b/languages/classes/LanguageGd.php
@@ -0,0 +1,35 @@
+<?php
+/** Scots Gaelic (Gàidhlig)
+ *
+ * @ingroup Language
+ *
+ * @author Raimond Spekking
+ */
+class LanguageGd extends Language {
+
+ /**
+ * Plural form transformations
+ * Based on this discussion: http://translatewiki.net/w/i.php?title=Portal_talk:Gd&oldid=1094065#%C3%80ireamhan
+ *
+ * $forms[0] - singular form (for 1)
+ * $forms[1] - dual form (for 2)
+ * $forms[2] - plural form 1 (for 3-10)
+ * $forms[3] - plural form 2 (for >= 11)
+ *
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 4 );
+
+ $count = abs( $count );
+ if ( $count === 1 ) {
+ return $forms[0];
+ } elseif ( $count === 2 ) {
+ return $forms[1];
+ } elseif ( $count >= 3 && $count <= 10 ) {
+ return $forms[2];
+ } else {
+ return $forms[3];
+ }
+ }
+}
diff --git a/languages/classes/LanguageHi.php b/languages/classes/LanguageHi.php
new file mode 100644
index 00000000..705c73bc
--- /dev/null
+++ b/languages/classes/LanguageHi.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageHi extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageHr.php b/languages/classes/LanguageHr.php
index 5fe6b3a7..d8e84876 100644
--- a/languages/classes/LanguageHr.php
+++ b/languages/classes/LanguageHr.php
@@ -8,6 +8,8 @@ class LanguageHr extends Language {
function convertPlural( $count, $forms ) {
if ( !count($forms) ) { return ''; }
+ // FIXME: CLDR defines 4 plural forms instead of 3. Plural for for decimals is missing.
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
$forms = $this->preConvertPlural( $forms, 3 );
if ($count > 10 && floor(($count % 100) / 10) == 1) {
diff --git a/languages/classes/LanguageJa.php b/languages/classes/LanguageJa.php
index 72c06e19..4a24260b 100644
--- a/languages/classes/LanguageJa.php
+++ b/languages/classes/LanguageJa.php
@@ -6,15 +6,11 @@
* @ingroup Language
*/
class LanguageJa extends Language {
- function stripForSearch( $string ) {
- # MySQL fulltext index doesn't grok utf-8, so we
- # need to fold cases and convert to hex
- $s = $string;
+ function wordSegmentation( $string ) {
+ // Strip known punctuation ?
+ // $s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f
- # Strip known punctuation ?
- #$s = preg_replace( '/\xe3\x80[\x80-\xbf]/', '', $s ); # U3000-303f
-
- # Space strings of like hiragana/katakana/kanji
+ // Space strings of like hiragana/katakana/kanji
$hiragana = '(?:\xe3(?:\x81[\x80-\xbf]|\x82[\x80-\x9f]))'; # U3040-309f
$katakana = '(?:\xe3(?:\x82[\xa0-\xbf]|\x83[\x80-\xbf]))'; # U30a0-30ff
$kanji = '(?:\xe3[\x88-\xbf][\x80-\xbf]'
@@ -22,14 +18,17 @@ class LanguageJa extends Language {
. '|\xe9[\x80-\xa5][\x80-\xbf]'
. '|\xe9\xa6[\x80-\x99])';
# U3200-9999 = \xe3\x88\x80-\xe9\xa6\x99
- $s = preg_replace( "/({$hiragana}+|{$katakana}+|{$kanji}+)/", ' $1 ', $s );
-
- # Double-width roman characters: ff00-ff5f ~= 0020-007f
- $s = preg_replace( '/\xef\xbc([\x80-\xbf])/e', 'chr((ord("$1") & 0x3f) + 0x20)', $s );
- $s = preg_replace( '/\xef\xbd([\x80-\x99])/e', 'chr((ord("$1") & 0x3f) + 0x60)', $s );
+ $reg = "/({$hiragana}+|{$katakana}+|{$kanji}+)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+ function normalizeForSearch( $string ) {
+ // Double-width roman characters
+ $s = self::convertDoubleWidth( $string );
+
# Do general case folding and UTF-8 armoring
- return parent::stripForSearch( $s );
+ return parent::normalizeForSearch( $s );
}
# Italic is not appropriate for Japanese script
diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php
index b358a709..318b82a8 100644
--- a/languages/classes/LanguageKk.php
+++ b/languages/classes/LanguageKk.php
@@ -21,10 +21,9 @@ class KkConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array()) {
parent::__construct( $langobj, $maincode,
- $variants, $variantfallbacks, $markup, $flags );
+ $variants, $variantfallbacks, $flags );
// No point delaying this since they're in code.
// Waiting until loadDefaultTables() means they never get loaded
@@ -209,16 +208,6 @@ class KkConverter extends LanguageConverter {
return $carray;
}
- // Do not convert content on talk pages
- function parserConvert( $text, &$parser ){
- if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage())
- $this->mDoContentConvert=false;
- else
- $this->mDoContentConvert=true;
-
- return parent::parserConvert($text, $parser );
- }
-
/*
* A function wrapper:
* - if there is no selected variant, leave the link
diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php
index 3b84a9b3..5ce68c57 100644
--- a/languages/classes/LanguageKu.php
+++ b/languages/classes/LanguageKu.php
@@ -132,16 +132,6 @@ class KuConverter extends LanguageConverter {
);
}
- // Do not convert content on talk pages
- function parserConvert( $text, &$parser ){
- if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage())
- $this->mDoContentConvert=false;
- else
- $this->mDoContentConvert=true;
-
- return parent::parserConvert($text, $parser );
- }
-
/*
* A function wrapper:
* - if there is no selected variant, leave the link
diff --git a/languages/classes/LanguageLn.php b/languages/classes/LanguageLn.php
new file mode 100644
index 00000000..bcf5f6e3
--- /dev/null
+++ b/languages/classes/LanguageLn.php
@@ -0,0 +1,18 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageLn extends Language {
+ /**
+ * Use singular form for zero
+ * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ln
+
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageLv.php b/languages/classes/LanguageLv.php
index 2c8d9f31..84974c37 100644
--- a/languages/classes/LanguageLv.php
+++ b/languages/classes/LanguageLv.php
@@ -23,6 +23,9 @@ class LanguageLv extends Language {
*/
function convertPlural( $count, $forms ) {
if ( !count($forms) ) { return ''; }
+
+ // FIXME: CLDR defines 3 plural forms instead of 2. Form for 0 is missing.
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#lv
$forms = $this->preConvertPlural( $forms, 2 );
return ( ( $count % 10 == 1 ) && ( $count % 100 != 11 ) ) ? $forms[0] : $forms[1];
diff --git a/languages/classes/LanguageMg.php b/languages/classes/LanguageMg.php
new file mode 100644
index 00000000..8593a298
--- /dev/null
+++ b/languages/classes/LanguageMg.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageMg extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageMk.php b/languages/classes/LanguageMk.php
new file mode 100644
index 00000000..268365df
--- /dev/null
+++ b/languages/classes/LanguageMk.php
@@ -0,0 +1,21 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageMk extends Language {
+ /**
+ * Plural forms per
+ * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#mk
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ if ( $count % 10 === 1 && $count % 100 !== 11 ) {
+ return $forms[0];
+ } else {
+ return $forms[1];
+ }
+ }
+}
diff --git a/languages/classes/LanguageMl.php b/languages/classes/LanguageMl.php
new file mode 100644
index 00000000..98b5fc96
--- /dev/null
+++ b/languages/classes/LanguageMl.php
@@ -0,0 +1,22 @@
+<?php
+
+class LanguageMl extends Language {
+ /**
+ * Temporary hack for the issue described at
+ * http://permalink.gmane.org/gmane.science.linguistics.wikipedia.technical/46396
+ * Convert Unicode 5.0 style Malayalam input to Unicode 5.1. Similar to
+ * bug 9413. Also fixes miscellaneous problems due to mishandling of ZWJ,
+ * e.g. bug 11162.
+ *
+ * FIXME: This is language-specific for now only to avoid the negative
+ * performance impact of enabling it for all languages.
+ */
+ function normalize( $s ) {
+ global $wgFixMalayalamUnicode;
+ $s = parent::normalize( $s );
+ if ( $wgFixMalayalamUnicode ) {
+ $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
+ }
+ return $s;
+ }
+}
diff --git a/languages/classes/LanguageMo.php b/languages/classes/LanguageMo.php
new file mode 100644
index 00000000..83cfafb8
--- /dev/null
+++ b/languages/classes/LanguageMo.php
@@ -0,0 +1,23 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageMo extends Language {
+ function convertPlural( $count, $forms ) {
+ // Plural rules per
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#mo
+ if ( !count($forms) ) { return ''; }
+
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ( $count == 1 ) {
+ $index = 0;
+ } elseif ( $count == 0 || $count % 100 < 20 ) {
+ $index = 1;
+ } else {
+ $index = 2;
+ }
+ return $forms[$index];
+ }
+}
diff --git a/languages/classes/LanguageNso.php b/languages/classes/LanguageNso.php
new file mode 100644
index 00000000..1b56e76a
--- /dev/null
+++ b/languages/classes/LanguageNso.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageNso extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguagePl.php b/languages/classes/LanguagePl.php
index 54314fab..bb798913 100644
--- a/languages/classes/LanguagePl.php
+++ b/languages/classes/LanguagePl.php
@@ -21,4 +21,12 @@ class LanguagePl extends Language {
return $forms[2]; // plural genitive
}
}
+
+ function commafy($_) {
+ if (!preg_match('/^\d{1,4}(.\d+)?$/',$_)) {
+ return strrev((string)preg_replace('/(\d{3})(?=\d)(?!\d*\.)/','$1,',strrev($_)));
+ } else {
+ return $_;
+ }
+ }
}
diff --git a/languages/classes/LanguageRo.php b/languages/classes/LanguageRo.php
new file mode 100644
index 00000000..928f79fb
--- /dev/null
+++ b/languages/classes/LanguageRo.php
@@ -0,0 +1,23 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageRo extends Language {
+ function convertPlural( $count, $forms ) {
+ // Plural rules per
+ // http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ro
+ if ( !count($forms) ) { return ''; }
+
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ( $count == 1 ) {
+ $index = 0;
+ } elseif ( $count == 0 || $count % 100 < 20 ) {
+ $index = 1;
+ } else {
+ $index = 2;
+ }
+ return $forms[$index];
+ }
+}
diff --git a/languages/classes/LanguageRu.php b/languages/classes/LanguageRu.php
index 5933b47a..bb737e84 100644
--- a/languages/classes/LanguageRu.php
+++ b/languages/classes/LanguageRu.php
@@ -76,6 +76,8 @@ class LanguageRu extends Language {
//if no number with word, then use $form[0] for singular and $form[1] for plural or zero
if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1];
+ // FIXME: CLDR defines 4 plural forms. Form with decimals missing.
+ // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ru
$forms = $this->preConvertPlural( $forms, 3 );
if ($count > 10 && floor(($count % 100) / 10) == 1) {
diff --git a/languages/classes/LanguageSe.php b/languages/classes/LanguageSe.php
new file mode 100644
index 00000000..aac943d6
--- /dev/null
+++ b/languages/classes/LanguageSe.php
@@ -0,0 +1,22 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageSe extends Language {
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+
+ // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#se
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ( $count == 1 ) {
+ $index = 0;
+ } elseif( $count == 2 ) {
+ $index = 1;
+ } else {
+ $index = 2;
+ }
+ return $forms[$index];
+ }
+}
diff --git a/languages/classes/LanguageSh.php b/languages/classes/LanguageSh.php
new file mode 100644
index 00000000..29f06f4f
--- /dev/null
+++ b/languages/classes/LanguageSh.php
@@ -0,0 +1,29 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageSh extends Language {
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+
+ //if no number with word, then use $form[0] for singular and $form[1] for plural or zero
+ if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1];
+
+ // FIXME: CLDR defines 4 plural forms. Form with decimals missing.
+ // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#sh
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ($count > 10 && floor(($count % 100) / 10) == 1) {
+ return $forms[2];
+ } else {
+ switch ($count % 10) {
+ case 1: return $forms[0];
+ case 2:
+ case 3:
+ case 4: return $forms[1];
+ default: return $forms[2];
+ }
+ }
+ }
+}
diff --git a/languages/classes/LanguageSma.php b/languages/classes/LanguageSma.php
new file mode 100644
index 00000000..5167fb08
--- /dev/null
+++ b/languages/classes/LanguageSma.php
@@ -0,0 +1,22 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageSma extends Language {
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+
+ // plural forms per http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#sma
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ( $count == 1 ) {
+ $index = 1;
+ } elseif( $count == 2 ) {
+ $index = 2;
+ } else {
+ $index = 3;
+ }
+ return $forms[$index];
+ }
+}
diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php
index 79a2ec25..693660fb 100644
--- a/languages/classes/LanguageSr.php
+++ b/languages/classes/LanguageSr.php
@@ -76,16 +76,6 @@ class SrConverter extends LanguageConverter {
return $carray;
}
- // Do not convert content on talk pages
- function parserConvert( $text, &$parser ){
- if(is_object($parser->getTitle() ) && $parser->getTitle()->isTalkPage())
- $this->mDoContentConvert=false;
- else
- $this->mDoContentConvert=true;
-
- return parent::parserConvert($text, $parser );
- }
-
/*
* A function wrapper:
* - if there is no selected variant, leave the link
@@ -175,12 +165,34 @@ class LanguageSr extends LanguageSr_ec {
'sr-el' => 'sr',
);
- $marker = array();//don't mess with these, leave them as they are
$flags = array(
'S' => 'S', 'писмо' => 'S', 'pismo' => 'S',
'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W'
);
- $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $marker, $flags);
+ $this->mConverter = new SrConverter($this, 'sr', $variants, $variantfallbacks, $flags);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
}
+
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+
+ //if no number with word, then use $form[0] for singular and $form[1] for plural or zero
+ if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1];
+
+ // FIXME: CLDR defines 4 plural forms. Form with decimals missing.
+ // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#ru
+ $forms = $this->preConvertPlural( $forms, 3 );
+
+ if ($count > 10 && floor(($count % 100) / 10) == 1) {
+ return $forms[2];
+ } else {
+ switch ($count % 10) {
+ case 1: return $forms[0];
+ case 2:
+ case 3:
+ case 4: return $forms[1];
+ default: return $forms[2];
+ }
+ }
+ }
}
diff --git a/languages/classes/LanguageTi.php b/languages/classes/LanguageTi.php
new file mode 100644
index 00000000..1974915d
--- /dev/null
+++ b/languages/classes/LanguageTi.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageTi extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageTl.php b/languages/classes/LanguageTl.php
new file mode 100644
index 00000000..23b6ad6f
--- /dev/null
+++ b/languages/classes/LanguageTl.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ *
+ * @ingroup Language
+ */
+class LanguageTl extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+}
diff --git a/languages/classes/LanguageTr.php b/languages/classes/LanguageTr.php
index 8a2dee09..57b0fd07 100644
--- a/languages/classes/LanguageTr.php
+++ b/languages/classes/LanguageTr.php
@@ -7,7 +7,7 @@
*/
class LanguageTr extends Language {
function ucfirst ( $string ) {
- if ( $string[0] == 'i' ) {
+ if ( !empty($string) && $string[0] == 'i' ) {
return 'İ' . substr( $string, 1 );
} else {
return parent::ucfirst( $string );
diff --git a/languages/classes/LanguageUk.php b/languages/classes/LanguageUk.php
index dbebb134..4b1dfde5 100644
--- a/languages/classes/LanguageUk.php
+++ b/languages/classes/LanguageUk.php
@@ -62,6 +62,8 @@ class LanguageUk extends Language {
//if no number with word, then use $form[0] for singular and $form[1] for plural or zero
if( count($forms) === 2 ) return $count == 1 ? $forms[0] : $forms[1];
+ // FIXME: CLDR defines 4 plural forms. Form for decimals is missing/
+ // See http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html#uk
$forms = $this->preConvertPlural( $forms, 3 );
if ($count > 10 && floor(($count % 100) / 10) == 1) {
diff --git a/languages/classes/LanguageWa.php b/languages/classes/LanguageWa.php
index d97b2026..4a4296ca 100644
--- a/languages/classes/LanguageWa.php
+++ b/languages/classes/LanguageWa.php
@@ -9,14 +9,22 @@
# k' i gn a.
class LanguageWa extends Language {
+ /**
+ * Use singular form for zero
+ */
+ function convertPlural( $count, $forms ) {
+ if ( !count($forms) ) { return ''; }
+ $forms = $this->preConvertPlural( $forms, 2 );
+
+ return ($count <= 1) ? $forms[0] : $forms[1];
+ }
+
###
### Dates in Walloon are "1î d' <monthname>" for 1st of the month,
### "<day> di <monthname>" for months starting by a consoun, and
### "<day> d' <monthname>" for months starting with a vowel
###
function date( $ts, $adj = false, $format = true, $tc = false ) {
- global $wgUser;
-
if ( $adj ) { $ts = $this->userAdjust( $ts, $tc ); }
$datePreference = $this->dateFormat( $format );
diff --git a/languages/classes/LanguageYue.php b/languages/classes/LanguageYue.php
index fc7f233c..6581d788 100644
--- a/languages/classes/LanguageYue.php
+++ b/languages/classes/LanguageYue.php
@@ -3,19 +3,30 @@
* @ingroup Language
*/
class LanguageYue extends Language {
- function stripForSearch( $string ) {
+ function hasWordBreaks() {
+ return false;
+ }
+
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo Fixme: only do this for Han characters...
+ */
+ function wordSegmentation( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+
+ function normalizeForSearch( $string ) {
wfProfileIn( __METHOD__ );
- // eventually this should be a word segmentation
- // for now just treat each character as a word
- // @fixme only do this for Han characters...
- $t = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1", $string);
+ // Double-width roman characters
+ $s = self::convertDoubleWidth( $string );
+ $s = trim( $s );
+ $s = parent::normalizeForSearch( $s );
- // Do general case folding and UTF-8 armoring
- $t = parent::stripForSearch( $t );
wfProfileOut( __METHOD__ );
- return $t;
+ return $s;
}
}
diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php
index cbb748c3..4a73c665 100644
--- a/languages/classes/LanguageZh.php
+++ b/languages/classes/LanguageZh.php
@@ -11,7 +11,6 @@ class ZhConverter extends LanguageConverter {
function __construct($langobj, $maincode,
$variants=array(),
$variantfallbacks=array(),
- $markup=array(),
$flags = array(),
$manualLevel = array() ) {
$this->mDescCodeSep = ':';
@@ -19,7 +18,6 @@ class ZhConverter extends LanguageConverter {
parent::__construct($langobj, $maincode,
$variants,
$variantfallbacks,
- $markup,
$flags,
$manualLevel);
$names = array(
@@ -133,6 +131,7 @@ class LanguageZh extends LanguageZh_hans {
parent::__construct();
$variants = array('zh','zh-hans','zh-hant','zh-cn','zh-hk','zh-mo','zh-my','zh-sg','zh-tw');
+
$variantfallbacks = array(
'zh' => array('zh-hans','zh-hant','zh-cn','zh-tw','zh-hk','zh-sg','zh-mo','zh-my'),
'zh-hans' => array('zh-cn','zh-sg','zh-my'),
@@ -152,7 +151,7 @@ class LanguageZh extends LanguageZh_hans {
$this->mConverter = new ZhConverter( $this, 'zh',
$variants, $variantfallbacks,
- array(),array(),
+ array(),
$ml);
$wgHooks['ArticleSaveComplete'][] = $this->mConverter;
@@ -171,31 +170,31 @@ class LanguageZh extends LanguageZh_hans {
"\"$1\"", $text);
}
- // word segmentation
- function stripForSearch( $string ) {
+ /**
+ * auto convert to zh-hans and normalize special characters.
+ *
+ * @param $string String
+ * @param $autoVariant String, default to 'zh-hans'
+ * @return String
+ */
+ function normalizeForSearch( $string, $autoVariant = 'zh-hans' ) {
wfProfileIn( __METHOD__ );
- // eventually this should be a word segmentation
- // for now just treat each character as a word
- // @fixme only do this for Han characters...
- $t = preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/",
- " $1", $string);
-
- //always convert to zh-hans before indexing. it should be
- //better to use zh-hans for search, since conversion from
- //Traditional to Simplified is less ambiguous than the
- //other way around
-
- $t = $this->mConverter->autoConvert($t, 'zh-hans');
- $t = parent::stripForSearch( $t );
+ // always convert to zh-hans before indexing. it should be
+ // better to use zh-hans for search, since conversion from
+ // Traditional to Simplified is less ambiguous than the
+ // other way around
+ $s = $this->mConverter->autoConvert( $string, $autoVariant );
+ // LanguageZh_hans::normalizeForSearch
+ $s = parent::normalizeForSearch( $s );
wfProfileOut( __METHOD__ );
- return $t;
+ return $s;
}
function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
+ $terms = self::convertDoubleWidth( $terms );
$terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode('|', $terms) );
return $ret;
diff --git a/languages/classes/LanguageZh_hans.php b/languages/classes/LanguageZh_hans.php
index 983dd485..5b03d731 100644
--- a/languages/classes/LanguageZh_hans.php
+++ b/languages/classes/LanguageZh_hans.php
@@ -4,21 +4,30 @@
* @ingroup Language
*/
class LanguageZh_hans extends Language {
- function stripForSearch( $string ) {
- # MySQL fulltext index doesn't grok utf-8, so we
- # need to fold cases and convert to hex
- # we also separate characters as "words"
- if( function_exists( 'mb_strtolower' ) ) {
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( \"$1\" )",
- mb_strtolower( $string ) );
- } else {
- list( , $wikiLowerChars ) = Language::getCaseMaps();
- return preg_replace(
- "/([\\xc0-\\xff][\\x80-\\xbf]*)/e",
- "' U8' . bin2hex( strtr( \"\$1\", \$wikiLowerChars ) )",
- $string );
- }
+ function hasWordBreaks() {
+ return false;
}
-}
+
+ /**
+ * Eventually this should be a word segmentation;
+ * for now just treat each character as a word.
+ * @todo Fixme: only do this for Han characters...
+ */
+ function wordSegmentation( $string ) {
+ $reg = "/([\\xc0-\\xff][\\x80-\\xbf]*)/";
+ $s = self::insertSpace( $string, $reg );
+ return $s;
+ }
+
+ function normalizeForSearch( $string ) {
+ wfProfileIn( __METHOD__ );
+
+ // Double-width roman characters
+ $s = self::convertDoubleWidth( $string );
+ $s = trim( $s );
+ $s = parent::normalizeForSearch( $s );
+
+ wfProfileOut( __METHOD__ );
+ return $s;
+ }
+} \ No newline at end of file