diff options
author | Bartosz Dziewoński <dziewonski@fastmail.fm> | 2024-12-16 22:49:46 +0100 |
---|---|---|
committer | Bartosz Dziewoński <dziewonski@fastmail.fm> | 2024-12-16 22:55:27 +0100 |
commit | 656db90eaefc2c820d58953985dae1dc608bd469 (patch) | |
tree | 791ad6aa74daf5ecda27c73d42e5a856ae76a16e /includes/parser | |
parent | 10feb25df71bd732627304208383abb5aa80f476 (diff) | |
download | mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.tar.gz mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.zip |
parser: Rename/reorder/document some variables in finalizeHeadings()
We've had too many different things all named 'headline'.
This commit should have no functional changes.
Change-Id: I417370ab6b1b67c80e013e8c6fd70c600c18245e
Diffstat (limited to 'includes/parser')
-rw-r--r-- | includes/parser/Parser.php | 51 |
1 files changed, 22 insertions, 29 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 72dd2a7a1319..a5e15e7f92ae 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -4332,6 +4332,7 @@ class Parser { $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel ); $domDocument = DOMUtils::parseHTML( '' ); foreach ( $headlines as $headline ) { + // $headline is half-parsed HTML $isTemplate = false; $titleText = false; $sectionIndex = false; @@ -4358,26 +4359,24 @@ class Parser { $haveTocEntries = true; } - # The safe header is a version of the header text safe to use for links - # Remove link placeholders by the link text. # <!--LINK number--> # turns into # link text with suffix # Do this before unstrip since link text can contain strip markers - $safeHeadline = $this->replaceLinkHoldersText( $headline ); + $fullyParsedHeadline = $this->replaceLinkHoldersText( $headline ); # Avoid insertion of weird stuff like <math> by expanding the relevant sections - $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline ); + $fullyParsedHeadline = $this->mStripState->unstripBoth( $fullyParsedHeadline ); // Run Tidy to convert wikitext entities to HTML entities (T355386), // conveniently also giving us a way to handle French spaces (T324763) - $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] ); + $fullyParsedHeadline = $this->tidy->tidy( $fullyParsedHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] ); // Wrap the safe headline to parse the heading attributes // Literal HTML tags should be sanitized at this point // cleanUpTocLine will strip the headline tag - $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>"; + $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $fullyParsedHeadline . "</h$level>"; // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags, // and ensures that we generate balanced HTML at the end (T218330). @@ -4392,40 +4391,34 @@ class Parser { $this->cleanUpTocLine( $headlineDom ); // Serialize back to HTML + // $tocline is for the TOC display, fully-parsed HTML with some tags removed $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) ); - # For the anchor, strip out HTML-y stuff period - $safeHeadline = trim( $headlineDom->textContent ); - - # Save headline for section edit hint before it's normalized for the link - $headlineHint = htmlspecialchars( $safeHeadline ); - - $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline ); - $safeHeadline = self::normalizeSectionName( $safeHeadline ); + // $headlineText is for the "Edit section: $1" tooltip, plain text + $headlineText = trim( $headlineDom->textContent ); - if ( $headingId !== null && $headingId !== '' ) { - $safeHeadline = $headingId; + if ( $headingId === null || $headingId === '' ) { + $headingId = Sanitizer::normalizeSectionNameWhitespace( $headlineText ); + $headingId = self::normalizeSectionName( $headingId ); } - $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK ); - $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline ); - $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY ); - if ( $fallbackHeadline === $safeHeadline ) { + # Create the anchor for linking from the TOC to the section + $fallbackAnchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_FALLBACK ); + $linkAnchor = Sanitizer::escapeIdForLink( $headingId ); + $anchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_PRIMARY ); + if ( $fallbackAnchor === $anchor ) { # No reason to have both (in fact, we can't) - $fallbackHeadline = false; + $fallbackAnchor = false; } # HTML IDs must be case-insensitively unique for IE compatibility (T12721). - $arrayKey = strtolower( $safeHeadline ); - if ( $fallbackHeadline === false ) { + $arrayKey = strtolower( $anchor ); + if ( $fallbackAnchor === false ) { $fallbackArrayKey = false; } else { - $fallbackArrayKey = strtolower( $fallbackHeadline ); + $fallbackArrayKey = strtolower( $fallbackAnchor ); } - # Create the anchor for linking from the TOC to the section - $anchor = $safeHeadline; - $fallbackAnchor = $fallbackHeadline; if ( isset( $refers[$arrayKey] ) ) { for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i ); $anchor .= "_$i"; @@ -4434,7 +4427,7 @@ class Parser { } else { $refers[$arrayKey] = true; } - if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) { + if ( $fallbackAnchor !== false && isset( $refers[$fallbackArrayKey] ) ) { for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i ); $fallbackAnchor .= "_$i"; $refers["{$fallbackArrayKey}_$i"] = true; @@ -4486,7 +4479,7 @@ class Parser { // because it is supposed to be able to convert that. $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT ); $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"'; - $editlink .= '>' . $headlineHint . '</mw:editsection>'; + $editlink .= '>' . htmlspecialchars( $headlineText ) . '</mw:editsection>'; } else { $editlink = ''; } |