parser: Rename/reorder/document some variables in finalizeHeadings()

We've had too many different things all named 'headline'. This commit should have no functional changes. Change-Id: I417370ab6b1b67c80e013e8c6fd70c600c18245e
author: Bartosz Dziewoński <dziewonski@fastmail.fm> 2024-12-16 22:49:46 +0100
committer: Bartosz Dziewoński <dziewonski@fastmail.fm> 2024-12-16 22:55:27 +0100
commit: 656db90eaefc2c820d58953985dae1dc608bd469 (patch)
tree: 791ad6aa74daf5ecda27c73d42e5a856ae76a16e /includes/parser
parent: 10feb25df71bd732627304208383abb5aa80f476 (diff)
download: mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.tar.gz
mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.zip
1 files changed, 22 insertions, 29 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 72dd2a7a1319..a5e15e7f92ae 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4332,6 +4332,7 @@ class Parser {
 		$maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
 		$domDocument = DOMUtils::parseHTML( '' );
 		foreach ( $headlines as $headline ) {
+			// $headline is half-parsed HTML
 			$isTemplate = false;
 			$titleText = false;
 			$sectionIndex = false;
@@ -4358,26 +4359,24 @@ class Parser {
 				$haveTocEntries = true;
 			}
 
-			# The safe header is a version of the header text safe to use for links
-
 			# Remove link placeholders by the link text.
 			#     <!--LINK number-->
 			# turns into
 			#     link text with suffix
 			# Do this before unstrip since link text can contain strip markers
-			$safeHeadline = $this->replaceLinkHoldersText( $headline );
+			$fullyParsedHeadline = $this->replaceLinkHoldersText( $headline );
 
 			# Avoid insertion of weird stuff like <math> by expanding the relevant sections
-			$safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
+			$fullyParsedHeadline = $this->mStripState->unstripBoth( $fullyParsedHeadline );
 
 			// Run Tidy to convert wikitext entities to HTML entities (T355386),
 			// conveniently also giving us a way to handle French spaces (T324763)
-			$safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
+			$fullyParsedHeadline = $this->tidy->tidy( $fullyParsedHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
 
 			// Wrap the safe headline to parse the heading attributes
 			// Literal HTML tags should be sanitized at this point
 			// cleanUpTocLine will strip the headline tag
-			$wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>";
+			$wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $fullyParsedHeadline . "</h$level>";
 
 			// Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
 			// and ensures that we generate balanced HTML at the end (T218330).
@@ -4392,40 +4391,34 @@ class Parser {
 			$this->cleanUpTocLine( $headlineDom );
 
 			// Serialize back to HTML
+			// $tocline is for the TOC display, fully-parsed HTML with some tags removed
 			$tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
 
-			# For the anchor, strip out HTML-y stuff period
-			$safeHeadline = trim( $headlineDom->textContent );
-
-			# Save headline for section edit hint before it's normalized for the link
-			$headlineHint = htmlspecialchars( $safeHeadline );
-
-			$safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
-			$safeHeadline = self::normalizeSectionName( $safeHeadline );
+			// $headlineText is for the "Edit section: $1" tooltip, plain text
+			$headlineText = trim( $headlineDom->textContent );
 
-			if ( $headingId !== null && $headingId !== '' ) {
-				$safeHeadline = $headingId;
+			if ( $headingId === null || $headingId === '' ) {
+				$headingId = Sanitizer::normalizeSectionNameWhitespace( $headlineText );
+				$headingId = self::normalizeSectionName( $headingId );
 			}
 
-			$fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
-			$linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
-			$safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
-			if ( $fallbackHeadline === $safeHeadline ) {
+			# Create the anchor for linking from the TOC to the section
+			$fallbackAnchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_FALLBACK );
+			$linkAnchor = Sanitizer::escapeIdForLink( $headingId );
+			$anchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_PRIMARY );
+			if ( $fallbackAnchor === $anchor ) {
 				# No reason to have both (in fact, we can't)
-				$fallbackHeadline = false;
+				$fallbackAnchor = false;
 			}
 
 			# HTML IDs must be case-insensitively unique for IE compatibility (T12721).
-			$arrayKey = strtolower( $safeHeadline );
-			if ( $fallbackHeadline === false ) {
+			$arrayKey = strtolower( $anchor );
+			if ( $fallbackAnchor === false ) {
 				$fallbackArrayKey = false;
 			} else {
-				$fallbackArrayKey = strtolower( $fallbackHeadline );
+				$fallbackArrayKey = strtolower( $fallbackAnchor );
 			}
 
-			# Create the anchor for linking from the TOC to the section
-			$anchor = $safeHeadline;
-			$fallbackAnchor = $fallbackHeadline;
 			if ( isset( $refers[$arrayKey] ) ) {
 				for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
 				$anchor .= "_$i";
@@ -4434,7 +4427,7 @@ class Parser {
 			} else {
 				$refers[$arrayKey] = true;
 			}
-			if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
+			if ( $fallbackAnchor !== false && isset( $refers[$fallbackArrayKey] ) ) {
 				for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
 				$fallbackAnchor .= "_$i";
 				$refers["{$fallbackArrayKey}_$i"] = true;
@@ -4486,7 +4479,7 @@ class Parser {
 				// because it is supposed to be able to convert that.
 				$editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
 				$editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
-				$editlink .= '>' . $headlineHint . '</mw:editsection>';
+				$editlink .= '>' . htmlspecialchars( $headlineText ) . '</mw:editsection>';
 			} else {
 				$editlink = '';
 			}
author	Bartosz Dziewoński <dziewonski@fastmail.fm>	2024-12-16 22:49:46 +0100
committer	Bartosz Dziewoński <dziewonski@fastmail.fm>	2024-12-16 22:55:27 +0100
commit	656db90eaefc2c820d58953985dae1dc608bd469 (patch)
tree	791ad6aa74daf5ecda27c73d42e5a856ae76a16e /includes/parser
parent	10feb25df71bd732627304208383abb5aa80f476 (diff)
download	mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.tar.gz mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.zip