1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
<?php
class CollationTest extends MediaWikiLangTestCase {
protected function setUp() {
parent::setUp();
if ( !extension_loaded( 'intl' ) ) {
$this->markTestSkipped( 'These tests require intl extension' );
}
}
/**
* Test to make sure, that if you
* have "X" and "XY", the binary
* sortkey also has "X" being a
* prefix of "XY". Our collation
* code makes this assumption.
*
* @param $lang String Language code for collator
* @param $base String Base string
* @param $extended String String containing base as a prefix.
*
* @dataProvider prefixDataProvider
*/
function testIsPrefix( $lang, $base, $extended ) {
$cp = Collator::create( $lang );
$cp->setStrength( Collator::PRIMARY );
$baseBin = $cp->getSortKey( $base );
// Remove sortkey terminator
$baseBin = rtrim( $baseBin, "\0" );
$extendedBin = $cp->getSortKey( $extended );
$this->assertStringStartsWith( $baseBin, $extendedBin, "$base is not a prefix of $extended" );
}
function prefixDataProvider() {
return array(
array( 'en', 'A', 'AA' ),
array( 'en', 'A', 'AAA' ),
array( 'en', 'Д', 'ДЂ' ),
array( 'en', 'Д', 'ДA' ),
// 'Ʒ' should expand to 'Z ' (note space).
array( 'fi', 'Z', 'Ʒ' ),
// 'Þ' should expand to 'th'
array( 'sv', 't', 'Þ' ),
// Javanese is a limited use alphabet, so should have 3 bytes
// per character, so do some tests with it.
array( 'en', 'ꦲ', 'ꦲꦤ' ),
array( 'en', 'ꦲ', 'ꦲД' ),
array( 'en', 'A', 'Aꦲ' ),
);
}
/**
* Opposite of testIsPrefix
*
* @dataProvider notPrefixDataProvider
*/
function testNotIsPrefix( $lang, $base, $extended ) {
$cp = Collator::create( $lang );
$cp->setStrength( Collator::PRIMARY );
$baseBin = $cp->getSortKey( $base );
// Remove sortkey terminator
$baseBin = rtrim( $baseBin, "\0" );
$extendedBin = $cp->getSortKey( $extended );
$this->assertStringStartsNotWith( $baseBin, $extendedBin, "$base is a prefix of $extended" );
}
function notPrefixDataProvider() {
return array(
array( 'en', 'A', 'B' ),
array( 'en', 'AC', 'ABC' ),
array( 'en', 'Z', 'Ʒ' ),
array( 'en', 'A', 'ꦲ' ),
);
}
/**
* Test correct first letter is fetched.
*
* @param $collation String Collation name (aka uca-en)
* @param $string String String to get first letter of
* @param $firstLetter String Expected first letter.
*
* @dataProvider firstLetterProvider
*/
function testGetFirstLetter( $collation, $string, $firstLetter ) {
$col = Collation::factory( $collation );
$this->assertEquals( $firstLetter, $col->getFirstLetter( $string ) );
}
function firstLetterProvider() {
return array(
array( 'uppercase', 'Abc', 'A' ),
array( 'uppercase', 'abc', 'A' ),
array( 'identity', 'abc', 'a' ),
array( 'uca-en', 'abc', 'A' ),
array( 'uca-en', ' ', ' ' ),
array( 'uca-en', 'Êveryone', 'E' ),
array( 'uca-vi', 'Êveryone', 'Ê' ),
// Make sure thorn is not a first letter.
array( 'uca-sv', 'The', 'T' ),
array( 'uca-sv', 'Å', 'Å' ),
array( 'uca-hu', 'dzsdo', 'Dzs' ),
array( 'uca-hu', 'dzdso', 'Dz' ),
array( 'uca-hu', 'CSD', 'Cs' ),
array( 'uca-root', 'CSD', 'C' ),
array( 'uca-fi', 'Ǥ', 'G' ),
array( 'uca-fi', 'Ŧ', 'T' ),
array( 'uca-fi', 'Ʒ', 'Z' ),
array( 'uca-fi', 'Ŋ', 'N' ),
);
}
}
|