1 files changed, 40 insertions, 44 deletions
diff --git a/includes/content/WikiTextStructure.php b/includes/content/WikiTextStructure.php
index 003eb2761da4..1046a4329b89 100644
--- a/includes/content/WikiTextStructure.php
+++ b/includes/content/WikiTextStructure.php
@@ -5,30 +5,20 @@ use MediaWiki\Parser\ParserOutput;
 use MediaWiki\Parser\Sanitizer;
 
 /**
- * Class allowing to explore structure of parsed wikitext.
+ * Class allowing to explore the structure of parsed wikitext.
  */
 class WikiTextStructure {
-	/**
-	 * @var string
-	 */
-	private $openingText;
-	/**
-	 * @var string
-	 */
-	private $allText;
-	/**
-	 * @var string[]
-	 */
-	private $auxText = [];
-	/**
-	 * @var ParserOutput
-	 */
-	private $parserOutput;
+
+	private ?string $openingText = null;
+	private ?string $allText = null;
+	/** @var string[] */
+	private array $auxText = [];
+	private ParserOutput $parserOutput;
 
 	/**
-	 * @var string[] selectors to elements that are excluded entirely from search
+	 * Selectors to elements that are excluded entirely from search
 	 */
-	private $excludedElementSelectors = [
+	private const EXCLUDED_ELEMENT_SELECTORS = [
 		// "it looks like you don't have javascript enabled..." – do not need to index
 		'audio', 'video',
 		// CSS stylesheets aren't content
@@ -39,7 +29,7 @@ class WikiTextStructure {
 		'.mw-cite-backlink',
 		// Headings are already indexed in their own field.
 		'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
-		// Collapsed fields are hidden by default so we don't want them showing up.
+		// Collapsed fields are hidden by default, so we don't want them showing up.
 		'.autocollapse',
 		// Content explicitly decided to be not searchable by editors such
 		// as custom navigation templates.
@@ -49,9 +39,9 @@ class WikiTextStructure {
 	];
 
 	/**
-	 * @var string[] selectors to elements that are considered auxiliary to article text for search
+	 * Selectors to elements that are considered auxiliary to the article text for search
 	 */
-	private $auxiliaryElementSelectors = [
+	private const AUXILIARY_ELEMENT_SELECTORS = [
 		// Thumbnail captions aren't really part of the text proper
 		'.thumbcaption',
 		'figcaption',
@@ -73,15 +63,18 @@ class WikiTextStructure {
 	}
 
 	/**
-	 * Get headings on the page.
+	 * Gets headings from the page.
 	 * @return string[]
 	 * First strip out things that look like references.  We can't use HTML filtering because
 	 * the references come back as <sup> tags without a class.  To keep from breaking stuff like
 	 *  ==Applicability of the strict mass–energy equivalence formula, ''E'' = ''mc''<sup>2</sup>==
-	 * we don't remove the whole <sup> tag.  We also don't want to strip the <sup> tag and remove
-	 * everything that looks like [2] because, I dunno, maybe there is a band named Word [2] Foo
-	 * or something.  Whatever.  So we only strip things that look like <sup> tags wrapping a
-	 * reference.  And since the data looks like:
+	 * we don't remove the whole <sup> tag.
+	 *
+	 * We also don't want to strip the <sup> tag and remove everything that looks like [2] because,
+	 * I don't know, maybe there is a band named Word [2] Foo r something. Whatever.
+	 *
+	 * So we only strip things that look like <sup> tags wrapping a reference. And since the data
+	 * looks like:
 	 *      Reference in heading <sup>&#91;1&#93;</sup><sup>&#91;2&#93;</sup>
 	 * we can not really use HtmlFormatter as we have no suitable selector.
 	 */
@@ -123,14 +116,16 @@ class WikiTextStructure {
 	 */
 	public static function parseSettingsInMessage( $message ) {
 		$lines = explode( "\n", $message );
-		$lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments
-		$lines = array_map( 'trim', $lines );          // Remove extra spaces
-		$lines = array_filter( $lines );               // Remove empty lines
-		return $lines;
+		// Remove comments
+		$lines = preg_replace( '/#.*$/', '', $lines );
+		// Remove extra spaces
+		$lines = array_map( 'trim', $lines );
+		// Remove empty lines
+		return array_filter( $lines );
 	}
 
 	/**
-	 * Get list of heading to ignore.
+	 * Gets a list of heading to ignore.
 	 * @return string[]
 	 */
 	private function getIgnoredHeadings() {
@@ -139,12 +134,13 @@ class WikiTextStructure {
 			$ignoredHeadings = [];
 			$source = wfMessage( 'search-ignored-headings' )->inContentLanguage();
 			if ( $source->isBlank() ) {
-				// Try old version too, just in case
+				// Try the old version too, just in case
 				$source = wfMessage( 'cirrussearch-ignored-headings' )->inContentLanguage();
 			}
 			if ( !$source->isDisabled() ) {
 				$lines = self::parseSettingsInMessage( $source->plain() );
-				$ignoredHeadings = $lines;               // Now we just have headings!
+				// Now we just have headings!
+				$ignoredHeadings = $lines;
 			}
 		}
 		return $ignoredHeadings;
@@ -172,13 +168,13 @@ class WikiTextStructure {
 		$formatter = new HtmlFormatter( $text );
 
 		// Strip elements from the page that we never want in the search text.
-		$formatter->remove( $this->excludedElementSelectors );
+		$formatter->remove( self::EXCLUDED_ELEMENT_SELECTORS );
 		$formatter->filterContent();
 
 		// Strip elements from the page that are auxiliary text.  These will still be
-		// searched but matches will be ranked lower and non-auxiliary matches will be
+		// searched, but matches will be ranked lower and non-auxiliary matches will be
 		// preferred in highlighting.
-		$formatter->remove( $this->auxiliaryElementSelectors );
+		$formatter->remove( self::AUXILIARY_ELEMENT_SELECTORS );
 		$auxiliaryElements = $formatter->filterContent();
 		$this->allText = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
 		foreach ( $auxiliaryElements as $auxiliaryElement ) {
@@ -195,25 +191,25 @@ class WikiTextStructure {
 	private function extractTextBeforeFirstHeading( $text ) {
 		$matches = [];
 		if ( !preg_match( '/<h[123456]>/', $text, $matches, PREG_OFFSET_CAPTURE ) ) {
-			// There isn't a first heading so we interpret this as the article
+			// There isn't a first heading, so we interpret this as the article
 			// being entirely without heading.
 			return null;
 		}
 		$text = substr( $text, 0, $matches[ 0 ][ 1 ] );
 		if ( !$text ) {
-			// There isn't any text before the first heading so we declare there isn't
+			// There isn't any text before the first heading, so we declare there isn't
 			// a first heading.
 			return null;
 		}
 
 		$formatter = new HtmlFormatter( $text );
-		$formatter->remove( $this->excludedElementSelectors );
-		$formatter->remove( $this->auxiliaryElementSelectors );
+		$formatter->remove( self::EXCLUDED_ELEMENT_SELECTORS );
+		$formatter->remove( self::AUXILIARY_ELEMENT_SELECTORS );
 		$formatter->filterContent();
 		$text = trim( Sanitizer::stripAllTags( $formatter->getText() ) );
 
 		if ( !$text ) {
-			// There isn't any text after filtering before the first heading so we declare
+			// There isn't any text after filtering before the first heading, so we declare
 			// that there isn't a first heading.
 			return null;
 		}
@@ -222,7 +218,7 @@ class WikiTextStructure {
 	}
 
 	/**
-	 * @return string
+	 * @return string|null
 	 */
 	public function getOpeningText() {
 		$this->extractWikitextParts();
@@ -246,7 +242,7 @@ class WikiTextStructure {
 	}
 
 	/**
-	 * Get the defaultsort property
+	 * Get the "defaultsort" property
 	 * @return string|null
 	 */
 	public function getDefaultSort() {