diff options
Diffstat (limited to 'maintenance/language/generateCollationData.php')
-rw-r--r-- | maintenance/language/generateCollationData.php | 102 |
1 files changed, 82 insertions, 20 deletions
diff --git a/maintenance/language/generateCollationData.php b/maintenance/language/generateCollationData.php index e34d9a13..12823c0c 100644 --- a/maintenance/language/generateCollationData.php +++ b/maintenance/language/generateCollationData.php @@ -30,19 +30,19 @@ require_once( __DIR__ .'/../Maintenance.php' ); */ class GenerateCollationData extends Maintenance { /** The directory with source data files in it */ - var $dataDir; + public $dataDir; /** The primary weights, indexed by codepoint */ - var $weights; + public $weights; /** * A hashtable keyed by codepoint, where presence indicates that a character * has a decomposition mapping. This makes it non-preferred for group header * selection. */ - var $mappedChars; + public $mappedChars; - var $debugOutFile; + public $debugOutFile; /** * Important tertiary weights from UTS #10 section 7.2 @@ -61,18 +61,80 @@ class GenerateCollationData extends Maintenance { public function execute() { $this->dataDir = $this->getOption( 'data-dir', '.' ); - if ( !file_exists( "{$this->dataDir}/allkeys.txt" ) ) { - $this->error( "Unable to find allkeys.txt. Please download it from " . - "http://www.unicode.org/Public/UCA/latest/allkeys.txt and specify " . - "its location with --data-dir=<DIR>" ); - exit( 1 ); - } - if ( !file_exists( "{$this->dataDir}/ucd.all.grouped.xml" ) ) { - $this->error( "Unable to find ucd.all.grouped.xml. Please download it " . - "from http://www.unicode.org/Public/6.0.0/ucdxml/ucd.all.grouped.zip " . - "and specify its location with --data-dir=<DIR>" ); + + $allkeysPresent = file_exists( "{$this->dataDir}/allkeys.txt" ); + $ucdallPresent = file_exists( "{$this->dataDir}/ucd.all.grouped.xml" ); + + // As of January 2013, these links work for all versions of Unicode + // between 5.1 and 6.2, inclusive. + $allkeysURL = "http://www.unicode.org/Public/UCA/<Unicode version>/allkeys.txt"; + $ucdallURL = "http://www.unicode.org/Public/<Unicode version>/ucdxml/ucd.all.grouped.zip"; + + if ( !$allkeysPresent || !$ucdallPresent ) { + $icuVersion = IcuCollation::getICUVersion(); + $unicodeVersion = IcuCollation::getUnicodeVersionForICU(); + + $error = ""; + + if ( !$allkeysPresent ) { + $error .= "Unable to find allkeys.txt. " + . "Download it and specify its location with --data-dir=<DIR>. " + . "\n\n"; + } + if ( !$ucdallPresent ) { + $error .= "Unable to find ucd.all.grouped.xml. " + . "Download it, unzip, and specify its location with --data-dir=<DIR>. " + . "\n\n"; + } + + $versionKnown = false; + if ( !$icuVersion ) { + // Unknown version - either very old intl, + // or PHP < 5.3.7 which does not expose this information + $error .= "As MediaWiki could not determine the version of ICU library used by your PHP's " + . "intl extension it can't suggest which file version to download. " + . "This can be caused by running a very old version of intl or PHP < 5.3.7. " + . "If you are sure everything is all right, find out the ICU version " + . "by running phpinfo(), check what is the Unicode version it is using " + . "at http://site.icu-project.org/download, then try finding appropriate data file(s) at:"; + } elseif ( version_compare( $icuVersion, "4.0", "<" ) ) { + // Extra old version + $error .= "You are using outdated version of ICU ($icuVersion), intended for " + . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) + . "; this file might not be avalaible for it, and it's not supported by MediaWiki. " + ." You are on your own; consider upgrading PHP's intl extension or try " + . "one of the files available at:"; + } elseif ( version_compare( $icuVersion, "51.0", ">=" ) ) { + // Extra recent version + $error .= "You are using ICU $icuVersion, released after this script was last updated. " + . "Check what is the Unicode version it is using at http://site.icu-project.org/download . " + . "It can't be guaranteed everything will work, but appropriate file(s) should " + . "be available at:"; + } else { + // ICU 4.0 to 50.x + $versionKnown = true; + $error .= "You are using ICU $icuVersion, intended for " + . ( $unicodeVersion ? "Unicode $unicodeVersion" : "an unknown version of Unicode" ) + . ". Appropriate file(s) should be available at:"; + } + $error .= "\n"; + + if ( $versionKnown && $unicodeVersion ) { + $allkeysURL = str_replace( "<Unicode version>", "$unicodeVersion.0", $allkeysURL ); + $ucdallURL = str_replace( "<Unicode version>", "$unicodeVersion.0", $ucdallURL ); + } + + if ( !$allkeysPresent ) { + $error .= "* $allkeysURL\n"; + } + if ( !$ucdallPresent ) { + $error .= "* $ucdallURL\n"; + } + + $this->error( $error ); exit( 1 ); } + $debugOutFileName = $this->getOption( 'debug-output' ); if ( $debugOutFileName ) { $this->debugOutFile = fopen( $debugOutFileName, 'w' ); @@ -285,12 +347,12 @@ class GenerateCollationData extends Maintenance { } class UcdXmlReader { - var $fileName; - var $callback; - var $groupAttrs; - var $xml; - var $blocks = array(); - var $currentBlock; + public $fileName; + public $callback; + public $groupAttrs; + public $xml; + public $blocks = array(); + public $currentBlock; function __construct( $fileName ) { $this->fileName = $fileName; |