1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
<?php
/**
* @group HtmlFormatter
*/
class HtmlFormatterTest extends MediaWikiTestCase {
/**
* @dataProvider getHtmlData
*
* @param string $input
* @param string $expectedText
* @param array $expectedRemoved
* @param callable|bool $callback
*/
public function testTransform( $input, $expectedText,
$expectedRemoved = array(), $callback = false
) {
$input = self::normalize( $input );
$formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
if ( $callback ) {
$callback( $formatter );
}
$removedElements = $formatter->filterContent();
$html = $formatter->getText();
$removed = array();
foreach ( $removedElements as $removedElement ) {
$removed[] = self::normalize( $formatter->getText( $removedElement ) );
}
$expectedRemoved = array_map( 'self::normalize', $expectedRemoved );
$this->assertValidHtmlSnippet( $html );
$this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
$this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
}
private static function normalize( $s ) {
return str_replace( "\n", '',
str_replace( "\r", '', $s ) // "yay" to Windows!
);
}
public function getHtmlData() {
$removeImages = function ( HtmlFormatter $f ) {
$f->setRemoveMedia();
};
$removeTags = function ( HtmlFormatter $f ) {
$f->remove( array( 'table', '.foo', '#bar', 'div.baz' ) );
};
$flattenSomeStuff = function ( HtmlFormatter $f ) {
$f->flatten( array( 's', 'div' ) );
};
$flattenEverything = function ( HtmlFormatter $f ) {
$f->flattenAllTags();
};
return array(
// remove images if asked
array(
'<img src="/foo/bar.jpg" alt="Blah"/>',
'',
array( '<img src="/foo/bar.jpg" alt="Blah">' ),
$removeImages,
),
// basic tag removal
array(
// @codingStandardsIgnoreStart Ignore long line warnings.
'<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
<span class="baz">baz</span>',
// @codingStandardsIgnoreEnd
'<div class="notfoo">test</div>
<span class="baz">baz</span>',
array(
'<table><tr><td>foo</td></tr></table>',
'<div class="foo">foo</div>',
'<div class="foo quux">foo</div>',
'<span id="bar">bar</span>',
'<strong class="foo" id="bar">foobar</strong>',
'<div class="baz"/>',
),
$removeTags,
),
// don't flatten tags that start like chosen ones
array(
'<div><s>foo</s> <span>bar</span></div>',
'foo <span>bar</span>',
array(),
$flattenSomeStuff,
),
// total flattening
array(
'<div style="foo">bar<sup>2</sup></div>',
'bar2',
array(),
$flattenEverything,
),
// UTF-8 preservation and security
array(
'<span title="" \' &"><Тест!></span> &<&&&&',
'<span title="" \' &"><Тест!></span> &<&&&&',
array(),
$removeTags, // Have some rules to trigger a DOM parse
),
// https://bugzilla.wikimedia.org/show_bug.cgi?id=53086
array(
'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
),
);
}
public function testQuickProcessing() {
$f = new MockHtmlFormatter( 'foo' );
$f->filterContent();
$this->assertFalse( $f->hasDoc, 'HtmlFormatter should not needlessly parse HTML' );
}
}
class MockHtmlFormatter extends HtmlFormatter {
public $hasDoc = false;
public function getDoc() {
$this->hasDoc = true;
return parent::getDoc();
}
}
|