update to MediaWiki 1.17.0

author: Pierre Schmitz <pierre@archlinux.de> 2011-06-22 11:28:20 +0200
committer: Pierre Schmitz <pierre@archlinux.de> 2011-06-22 11:28:20 +0200
commit: 9db190c7e736ec8d063187d4241b59feaf7dc2d1 (patch)
tree: 46d1a0dee7febef5c2d57a9f7b972be16a163b3d /includes/normal/UtfNormalTest2.php
parent: 78677c7bbdcc9739f6c10c75935898a20e1acd9e (diff)
1 files changed, 239 insertions, 0 deletions
diff --git a/includes/normal/UtfNormalTest2.php b/includes/normal/UtfNormalTest2.php
new file mode 100644
index 00000000..fafd5475
--- /dev/null
+++ b/includes/normal/UtfNormalTest2.php
@@ -0,0 +1,239 @@
+#!/usr/bin/php
+<?php
+/**
+ * Other tests for the unicode normalization module
+ *
+ * @file
+ * @ingroup UtfNormal
+ */
+
+if( php_sapi_name() != 'cli' ) {
+	die( "Run me from the command line please.\n" );
+}
+
+// From http://unicode.org/Public/UNIDATA/NormalizationTest.txt
+$file = "NormalizationTest.txt";
+
+// Anything after this character is a comment
+define ( 'COMMENT', '#' );
+
+// Semicolons are used to separate the columns
+define ( 'SEPARATOR', ';' );
+
+$f = fopen($file, "r");
+
+/**
+ * The following section will be used for testing different normalization methods.
+ * - Pure PHP
+     ~ no assertion errors
+     ~ 6.25 minutes
+
+ * - php_utfnormal.so or intl extension: both are wrappers around
+     libicu so we list the version of libicu when making the
+     comparison
+
+ * - libicu Ubuntu 3.8.1-3ubuntu1.1 php 5.2.6-3ubuntu4.5
+     ~ 2200 assertion errors
+     ~ 5 seconds
+	 ~ output: http://paste2.org/p/921566
+
+ * - libicu Ubuntu 4.2.1-3 php 5.3.2-1ubuntu4.2
+     ~ 1384 assertion errors
+	 ~ 15 seconds
+	 ~ output: http://paste2.org/p/921435
+
+ * - libicu Debian 4.4.1-5 php 5.3.2-1ubuntu4.2
+     ~ no assertion errors
+	 ~ 13 seconds
+
+ * - Tests comparing pure PHP output with libicu output were added
+     later and slow down the runtime.
+ */
+
+require_once("./UtfNormal.php");
+function normalize_form_c($c)      { return UtfNormal::toNFC($c);  }
+function normalize_form_d($c)      { return UtfNormal::toNFD($c);  }
+function normalize_form_kc($c)     { return UtfNormal::toNFKC($c); }
+function normalize_form_kd($c)     { return UtfNormal::toNFKD($c); }
+
+/**
+ * This set of functions is only useful if youve added a param to the
+ * following functions to force pure PHP usage.  I decided not to
+ * commit that code since might produce a slowdown in the UTF
+ * normalization code just for the sake of these tests. -- hexmode
+ */
+function normalize_form_c_php($c)  { return UtfNormal::toNFC($c, "php");  }
+function normalize_form_d_php($c)  { return UtfNormal::toNFD($c, "php");  }
+function normalize_form_kc_php($c) { return UtfNormal::toNFKC($c, "php"); }
+function normalize_form_kd_php($c) { return UtfNormal::toNFKD($c, "php"); }
+
+assert_options(ASSERT_ACTIVE, 1);
+assert_options(ASSERT_WARNING, 0);
+assert_options(ASSERT_QUIET_EVAL, 1);
+assert_options(ASSERT_CALLBACK, 'my_assert');
+
+function my_assert( $file, $line, $code ) {
+	global $col, $lineNo;
+	echo "Assertion that '$code' failed on line $lineNo ($col[5])\n";
+}
+
+$count = 0;
+$lineNo = 0;
+if( $f !== false ) {
+	while( ( $col = getRow( $f ) ) !== false ) {
+		$lineNo++;
+
+		if(count($col) == 6) {
+			$count++;
+			if( $count % 100 === 0 ) echo "Count: $count\n";
+		} else {
+			continue;
+		}
+
+		# verify that the pure PHP version is correct
+		$NFCc1  = normalize_form_c($col[0]);
+		$NFCc1p = normalize_form_c_php($col[0]);
+		assert('$NFCc1 === $NFCc1p');
+		$NFCc2  = normalize_form_c($col[1]);
+		$NFCc2p = normalize_form_c_php($col[1]);
+		assert('$NFCc2 === $NFCc2p');
+		$NFCc3  = normalize_form_c($col[2]);
+		$NFCc3p = normalize_form_c_php($col[2]);
+		assert('$NFCc3 === $NFCc3p');
+		$NFCc4  = normalize_form_c($col[3]);
+		$NFCc4p = normalize_form_c_php($col[3]);
+		assert('$NFCc4 === $NFCc4p');
+		$NFCc5  = normalize_form_c($col[4]);
+		$NFCc5p = normalize_form_c_php($col[4]);
+		assert('$NFCc5 === $NFCc5p');
+
+		$NFDc1  = normalize_form_d($col[0]);
+		$NFDc1p = normalize_form_d_php($col[0]);
+		assert('$NFDc1 === $NFDc1p');
+		$NFDc2  = normalize_form_d($col[1]);
+		$NFDc2p = normalize_form_d_php($col[1]);
+		assert('$NFDc2 === $NFDc2p');
+		$NFDc3  = normalize_form_d($col[2]);
+		$NFDc3p = normalize_form_d_php($col[2]);
+		assert('$NFDc3 === $NFDc3p');
+		$NFDc4  = normalize_form_d($col[3]);
+		$NFDc4p = normalize_form_d_php($col[3]);
+		assert('$NFDc4 === $NFDc4p');
+		$NFDc5  = normalize_form_d($col[4]);
+		$NFDc5p = normalize_form_d_php($col[4]);
+		assert('$NFDc5 === $NFDc5p');
+
+		$NFKDc1  = normalize_form_kd($col[0]);
+		$NFKDc1p = normalize_form_kd_php($col[0]);
+		assert('$NFKDc1 === $NFKDc1p');
+		$NFKDc2  = normalize_form_kd($col[1]);
+		$NFKDc2p = normalize_form_kd_php($col[1]);
+		assert('$NFKDc2 === $NFKDc2p');
+		$NFKDc3  = normalize_form_kd($col[2]);
+		$NFKDc3p = normalize_form_kd_php($col[2]);
+		assert('$NFKDc3 === $NFKDc3p');
+		$NFKDc4  = normalize_form_kd($col[3]);
+		$NFKDc4p = normalize_form_kd_php($col[3]);
+		assert('$NFKDc4 === $NFKDc4p');
+		$NFKDc5  = normalize_form_kd($col[4]);
+		$NFKDc5p = normalize_form_kd_php($col[4]);
+		assert('$NFKDc5 === $NFKDc5p');
+
+		$NFKCc1  = normalize_form_kc($col[0]);
+		$NFKCc1p = normalize_form_kc_php($col[0]);
+		assert('$NFKCc1 === $NFKCc1p');
+		$NFKCc2  = normalize_form_kc($col[1]);
+		$NFKCc2p = normalize_form_kc_php($col[1]);
+		assert('$NFKCc2 === $NFKCc2p');
+		$NFKCc3  = normalize_form_kc($col[2]);
+		$NFKCc3p = normalize_form_kc_php($col[2]);
+		assert('$NFKCc3 === $NFKCc3p');
+		$NFKCc4  = normalize_form_kc($col[3]);
+		$NFKCc4p = normalize_form_kc_php($col[3]);
+		assert('$NFKCc4 === $NFKCc4p');
+		$NFKCc5  = normalize_form_kc($col[4]);
+		$NFKCc5p = normalize_form_kc_php($col[4]);
+		assert('$NFKCc5 === $NFKCc5p');
+
+		# c2 ==	 NFC(c1) ==	 NFC(c2) ==	 NFC(c3)
+		assert('$col[1] === $NFCc1');
+		assert('$col[1] === $NFCc2');
+		assert('$col[1] === $NFCc3');
+
+		# c4 ==	 NFC(c4) ==	 NFC(c5)
+		assert('$col[3] === $NFCc4');
+		assert('$col[3] === $NFCc5');
+
+		# c3 ==	 NFD(c1) ==	 NFD(c2) ==	 NFD(c3)
+		assert('$col[2] === $NFDc1');
+		assert('$col[2] === $NFDc2');
+		assert('$col[2] === $NFDc3');
+
+		# c5 ==	 NFD(c4) ==	 NFD(c5)
+		assert('$col[4] === $NFDc4');
+		assert('$col[4] === $NFDc5');
+
+		# c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
+		assert('$col[3] === $NFKCc1');
+		assert('$col[3] === $NFKCc2');
+		assert('$col[3] === $NFKCc3');
+		assert('$col[3] === $NFKCc4');
+		assert('$col[3] === $NFKCc5');
+
+		# c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
+		assert('$col[4] === $NFKDc1');
+		assert('$col[4] === $NFKDc2');
+		assert('$col[4] === $NFKDc3');
+		assert('$col[4] === $NFKDc4');
+		assert('$col[4] === $NFKDc5');
+	}
+}
+echo "done.\n";
+
+// Compare against http://en.wikipedia.org/wiki/UTF-8#Description
+function unichr($c) {
+	if ($c <= 0x7F) {
+		return chr($c);
+	} else if ($c <= 0x7FF) {
+		return chr(0xC0 | $c >> 6) . chr(0x80 | $c & 0x3F);
+	} else if ($c <= 0xFFFF) {
+		return chr(0xE0 | $c >> 12) . chr(0x80 | $c >> 6 & 0x3F)
+			. chr(0x80 | $c & 0x3F);
+	} else if ($c <= 0x10FFFF) {
+		return chr(0xF0 | $c >> 18) . chr(0x80 | $c >> 12 & 0x3F)
+			. chr(0x80 | $c >> 6 & 0x3F)
+			. chr(0x80 | $c & 0x3F);
+	} else {
+		return false;
+	}
+}
+
+function unistr($c) {
+	return implode("", array_map("unichr", array_map("hexdec", explode(" ", $c))));
+}
+
+function getRow( $f ) {
+	$row = fgets( $f );
+	if( $row === false ) return false;
+	$row = rtrim($row);
+	$pos = strpos( $row, COMMENT );
+	$pos2 = strpos( $row, ")" );
+	if( $pos === 0 ) return array($row);
+	$c = "";
+
+	if( $pos ) {
+		if($pos2) $c = substr( $row, $pos2 + 2 );
+		else	  $c = substr( $row, $pos );
+		$row = substr( $row, 0, $pos );
+	}
+
+	$ret = array();
+	foreach( explode( SEPARATOR, $row ) as $ent ) {
+		if( trim( $ent ) !== "" ) {
+			$ret[] = unistr($ent);
+		}
+	}
+	$ret[] = $c;
+
+	return $ret;
+}
author	Pierre Schmitz <pierre@archlinux.de>	2011-06-22 11:28:20 +0200
committer	Pierre Schmitz <pierre@archlinux.de>	2011-06-22 11:28:20 +0200
commit	9db190c7e736ec8d063187d4241b59feaf7dc2d1 (patch)
tree	46d1a0dee7febef5c2d57a9f7b972be16a163b3d /includes/normal/UtfNormalTest2.php
parent	78677c7bbdcc9739f6c10c75935898a20e1acd9e (diff)