summaryrefslogtreecommitdiff
path: root/tests/phpunit/includes/HtmlFormatterTest.php
blob: 9dbfa452c8b09897a568e26834352aa66070cc59 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
<?php

/**
 * @group HtmlFormatter
 */
class HtmlFormatterTest extends MediaWikiTestCase {
	/**
	 * @dataProvider getHtmlData
	 *
	 * @param string $input
	 * @param string $expectedText
	 * @param array $expectedRemoved
	 * @param callable|bool $callback
	 */
	public function testTransform( $input, $expectedText,
		$expectedRemoved = array(), $callback = false
	) {
		$input = self::normalize( $input );
		$formatter = new HtmlFormatter( HtmlFormatter::wrapHTML( $input ) );
		if ( $callback ) {
			$callback( $formatter );
		}
		$removedElements = $formatter->filterContent();
		$html = $formatter->getText();
		$removed = array();
		foreach ( $removedElements as $removedElement ) {
			$removed[] = self::normalize( $formatter->getText( $removedElement ) );
		}
		$expectedRemoved = array_map( 'self::normalize', $expectedRemoved );

		$this->assertValidHtmlSnippet( $html );
		$this->assertEquals( self::normalize( $expectedText ), self::normalize( $html ) );
		$this->assertEquals( asort( $expectedRemoved ), asort( $removed ) );
	}

	private static function normalize( $s ) {
		return str_replace( "\n", '',
			str_replace( "\r", '', $s ) // "yay" to Windows!
		);
	}

	public function getHtmlData() {
		$removeImages = function ( HtmlFormatter $f ) {
			$f->setRemoveMedia();
		};
		$removeTags = function ( HtmlFormatter $f ) {
			$f->remove( array( 'table', '.foo', '#bar', 'div.baz' ) );
		};
		$flattenSomeStuff = function ( HtmlFormatter $f ) {
			$f->flatten( array( 's', 'div' ) );
		};
		$flattenEverything = function ( HtmlFormatter $f ) {
			$f->flattenAllTags();
		};
		return array(
			// remove images if asked
			array(
				'<img src="/foo/bar.jpg" alt="Blah"/>',
				'',
				array( '<img src="/foo/bar.jpg" alt="Blah">' ),
				$removeImages,
			),
			// basic tag removal
			array(
				// @codingStandardsIgnoreStart Ignore long line warnings.
				'<table><tr><td>foo</td></tr></table><div class="foo">foo</div><div class="foo quux">foo</div><span id="bar">bar</span>
<strong class="foo" id="bar">foobar</strong><div class="notfoo">test</div><div class="baz"/>
<span class="baz">baz</span>',
				// @codingStandardsIgnoreEnd
				'<div class="notfoo">test</div>
<span class="baz">baz</span>',
				array(
					'<table><tr><td>foo</td></tr></table>',
					'<div class="foo">foo</div>',
					'<div class="foo quux">foo</div>',
					'<span id="bar">bar</span>',
					'<strong class="foo" id="bar">foobar</strong>',
					'<div class="baz"/>',
				),
				$removeTags,
			),
			// don't flatten tags that start like chosen ones
			array(
				'<div><s>foo</s> <span>bar</span></div>',
				'foo <span>bar</span>',
				array(),
				$flattenSomeStuff,
			),
			// total flattening
			array(
				'<div style="foo">bar<sup>2</sup></div>',
				'bar2',
				array(),
				$flattenEverything,
			),
			// UTF-8 preservation and security
			array(
				'<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&#38;&#0038;&#x26;&#x026;',
				'<span title="&quot; \' &amp;">&lt;Тест!&gt;</span> &amp;&lt;&amp;&amp;&amp;&amp;',
				array(),
				$removeTags, // Have some rules to trigger a DOM parse
			),
			// https://bugzilla.wikimedia.org/show_bug.cgi?id=53086
			array(
				'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
					. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
				'Foo<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">[1]</a></sup>'
					. ' <a href="/wiki/Bar" title="Bar" class="mw-redirect">Bar</a>',
			),
		);
	}

	public function testQuickProcessing() {
		$f = new MockHtmlFormatter( 'foo' );
		$f->filterContent();
		$this->assertFalse( $f->hasDoc, 'HtmlFormatter should not needlessly parse HTML' );
	}
}

class MockHtmlFormatter extends HtmlFormatter {
	public $hasDoc = false;

	public function getDoc() {
		$this->hasDoc = true;
		return parent::getDoc();
	}
}