aboutsummaryrefslogtreecommitdiffstats
path: root/includes/parser
diff options
context:
space:
mode:
authorBartosz Dziewoński <dziewonski@fastmail.fm>2024-12-16 22:49:46 +0100
committerBartosz Dziewoński <dziewonski@fastmail.fm>2024-12-16 22:55:27 +0100
commit656db90eaefc2c820d58953985dae1dc608bd469 (patch)
tree791ad6aa74daf5ecda27c73d42e5a856ae76a16e /includes/parser
parent10feb25df71bd732627304208383abb5aa80f476 (diff)
downloadmediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.tar.gz
mediawikicore-656db90eaefc2c820d58953985dae1dc608bd469.zip
parser: Rename/reorder/document some variables in finalizeHeadings()
We've had too many different things all named 'headline'. This commit should have no functional changes. Change-Id: I417370ab6b1b67c80e013e8c6fd70c600c18245e
Diffstat (limited to 'includes/parser')
-rw-r--r--includes/parser/Parser.php51
1 files changed, 22 insertions, 29 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 72dd2a7a1319..a5e15e7f92ae 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4332,6 +4332,7 @@ class Parser {
$maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
$domDocument = DOMUtils::parseHTML( '' );
foreach ( $headlines as $headline ) {
+ // $headline is half-parsed HTML
$isTemplate = false;
$titleText = false;
$sectionIndex = false;
@@ -4358,26 +4359,24 @@ class Parser {
$haveTocEntries = true;
}
- # The safe header is a version of the header text safe to use for links
-
# Remove link placeholders by the link text.
# <!--LINK number-->
# turns into
# link text with suffix
# Do this before unstrip since link text can contain strip markers
- $safeHeadline = $this->replaceLinkHoldersText( $headline );
+ $fullyParsedHeadline = $this->replaceLinkHoldersText( $headline );
# Avoid insertion of weird stuff like <math> by expanding the relevant sections
- $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
+ $fullyParsedHeadline = $this->mStripState->unstripBoth( $fullyParsedHeadline );
// Run Tidy to convert wikitext entities to HTML entities (T355386),
// conveniently also giving us a way to handle French spaces (T324763)
- $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
+ $fullyParsedHeadline = $this->tidy->tidy( $fullyParsedHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
// Wrap the safe headline to parse the heading attributes
// Literal HTML tags should be sanitized at this point
// cleanUpTocLine will strip the headline tag
- $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>";
+ $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $fullyParsedHeadline . "</h$level>";
// Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
// and ensures that we generate balanced HTML at the end (T218330).
@@ -4392,40 +4391,34 @@ class Parser {
$this->cleanUpTocLine( $headlineDom );
// Serialize back to HTML
+ // $tocline is for the TOC display, fully-parsed HTML with some tags removed
$tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
- # For the anchor, strip out HTML-y stuff period
- $safeHeadline = trim( $headlineDom->textContent );
-
- # Save headline for section edit hint before it's normalized for the link
- $headlineHint = htmlspecialchars( $safeHeadline );
-
- $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
- $safeHeadline = self::normalizeSectionName( $safeHeadline );
+ // $headlineText is for the "Edit section: $1" tooltip, plain text
+ $headlineText = trim( $headlineDom->textContent );
- if ( $headingId !== null && $headingId !== '' ) {
- $safeHeadline = $headingId;
+ if ( $headingId === null || $headingId === '' ) {
+ $headingId = Sanitizer::normalizeSectionNameWhitespace( $headlineText );
+ $headingId = self::normalizeSectionName( $headingId );
}
- $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
- $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
- $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
- if ( $fallbackHeadline === $safeHeadline ) {
+ # Create the anchor for linking from the TOC to the section
+ $fallbackAnchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_FALLBACK );
+ $linkAnchor = Sanitizer::escapeIdForLink( $headingId );
+ $anchor = Sanitizer::escapeIdForAttribute( $headingId, Sanitizer::ID_PRIMARY );
+ if ( $fallbackAnchor === $anchor ) {
# No reason to have both (in fact, we can't)
- $fallbackHeadline = false;
+ $fallbackAnchor = false;
}
# HTML IDs must be case-insensitively unique for IE compatibility (T12721).
- $arrayKey = strtolower( $safeHeadline );
- if ( $fallbackHeadline === false ) {
+ $arrayKey = strtolower( $anchor );
+ if ( $fallbackAnchor === false ) {
$fallbackArrayKey = false;
} else {
- $fallbackArrayKey = strtolower( $fallbackHeadline );
+ $fallbackArrayKey = strtolower( $fallbackAnchor );
}
- # Create the anchor for linking from the TOC to the section
- $anchor = $safeHeadline;
- $fallbackAnchor = $fallbackHeadline;
if ( isset( $refers[$arrayKey] ) ) {
for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
$anchor .= "_$i";
@@ -4434,7 +4427,7 @@ class Parser {
} else {
$refers[$arrayKey] = true;
}
- if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
+ if ( $fallbackAnchor !== false && isset( $refers[$fallbackArrayKey] ) ) {
for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
$fallbackAnchor .= "_$i";
$refers["{$fallbackArrayKey}_$i"] = true;
@@ -4486,7 +4479,7 @@ class Parser {
// because it is supposed to be able to convert that.
$editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
$editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
- $editlink .= '>' . $headlineHint . '</mw:editsection>';
+ $editlink .= '>' . htmlspecialchars( $headlineText ) . '</mw:editsection>';
} else {
$editlink = '';
}