aboutsummaryrefslogtreecommitdiffstats
path: root/tests/phpunit/includes/search/SearchUpdateTest.php
blob: b2b97293e5662d5f623b676426bd2bb2164dfb9f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
<?php

use MediaWiki\Page\PageIdentityValue;
use MediaWiki\Search\SearchUpdate;

/**
 * @group Search
 * @covers \MediaWiki\Search\SearchUpdate
 */
class SearchUpdateTest extends MediaWikiIntegrationTestCase {

	private SearchUpdate $su;

	protected function setUp(): void {
		parent::setUp();
		$pageIdentity = new PageIdentityValue( 42, NS_MAIN, 'Main_Page', PageIdentityValue::LOCAL );
		$this->su = new SearchUpdate( 0, $pageIdentity );
	}

	public function updateText( $text ) {
		return trim( $this->su->updateText( $text ) );
	}

	public function testUpdateText() {
		$this->assertEquals(
			'test',
			$this->updateText( '<div>TeSt</div>' ),
			'HTML stripped, text lowercased'
		);

		$this->assertEquals(
			'foo bar boz quux',
			$this->updateText( <<<EOT
<table style="color:red; font-size:100px">
	<tr class="scary"><td><div>foo</div></td><tr>bar</td></tr>
	<tr><td>boz</td><tr>quux</td></tr>
</table>
EOT
			), 'Stripping HTML tables' );

		$this->assertEquals(
			'a b',
			$this->updateText( 'a > b' ),
			'Handle unclosed tags'
		);

		$text = str_pad( "foo <barbarbar \n", 10000, 'x' );

		$this->assertNotEquals(
			'',
			$this->updateText( $text ),
			'T20609'
		);
	}

	/**
	 * T34712: Test if unicode quotes in article links make its search index empty
	 */
	public function testUnicodeLinkSearchIndexError() {
		$text = "text „http://example.com“ text";
		$result = $this->updateText( $text );
		$processed = preg_replace( '/Q/u', 'Q', $result );
		$this->assertTrue(
			$processed != '',
			'Link surrounded by unicode quotes should not fail UTF-8 validation'
		);
	}
}