aboutsummaryrefslogtreecommitdiffstats
path: root/includes/parser
diff options
context:
space:
mode:
authorC. Scott Ananian <cscott@cscott.net>2025-01-28 11:59:46 -0500
committerC. Scott Ananian <cscott@cscott.net>2025-02-02 12:47:34 -0500
commit7b1c8c45600596944bea6b2e200f3fae0a7ea3f0 (patch)
tree0beec1fb26a042b89944aa7f6ebd0462984a4a7a /includes/parser
parent08c51fb766af7ff6dfc705fe98745cb6e2c56820 (diff)
downloadmediawikicore-7b1c8c45600596944bea6b2e200f3fae0a7ea3f0.tar.gz
mediawikicore-7b1c8c45600596944bea6b2e200f3fae0a7ea3f0.zip
Add 'isRawHTML' output mode for parser functions and extensions
Ensure that when a parser function or extension returns raw HTML (using the new 'isRawHTML' flag) it is protected from doBlockLevels, language conversion, etc by using a 'nowiki' strip marker. Bug: T381617 Depends-On: I8f43f6ae9ca9a0c8d88c92b65c81fdc5cfa09dc3 Change-Id: Icb8eae9c1f3146e19c6bd811ab1fc86eebaa991f
Diffstat (limited to 'includes/parser')
-rw-r--r--includes/parser/Parser.php30
1 files changed, 27 insertions, 3 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index fb6af228b827..b6ef7ebcf4ff 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -3022,8 +3022,12 @@ class Parser {
$text = '';
// wiki markup in $text should be escaped
$nowiki = false;
- // $text is HTML, armour it against wikitext transformation
+ // $text is HTML, armour it against most wikitext transformation
+ // (it still participates in doBlockLevels, language conversion,
+ // and the other steps at the start of ::internalParseHalfParsed)
$isHTML = false;
+ // $text is raw HTML, armour it against all wikitext transformation
+ $isRawHTML = false;
// Force interwiki transclusion to be done in raw mode not rendered
$forceRawInterwiki = false;
// $text is a DOM node needing expansion in a child frame
@@ -3146,6 +3150,9 @@ class Parser {
if ( isset( $result['isHTML'] ) ) {
$isHTML = $result['isHTML'];
}
+ if ( isset( $result['isRawHTML'] ) ) {
+ $isRawHTML = $result['isRawHTML'];
+ }
if ( isset( $result['forceRawInterwiki'] ) ) {
$forceRawInterwiki = $result['forceRawInterwiki'];
}
@@ -3348,6 +3355,14 @@ class Parser {
if ( $isHTML ) {
// @phan-suppress-next-line SecurityCheck-XSS
$text = $this->insertStripItem( $text );
+ } elseif ( $isRawHTML ) {
+ $marker = self::MARKER_PREFIX . "-pf-"
+ . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
+ // use 'nowiki' type to protect this from doBlockLevels,
+ // language conversion, etc.
+ // @phan-suppress-next-line SecurityCheck-XSS
+ $this->mStripState->addNoWiki( $marker, $text );
+ $text = $marker;
} elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
# Escape nowiki-style return values
// @phan-suppress-next-line SecurityCheck-DoubleEscaped
@@ -3397,7 +3412,8 @@ class Parser {
* whether the parser function was found or not. It may also contain the
* following:
* text: string|object, resulting wikitext or PP DOM object
- * isHTML: bool, $text is HTML, armour it against wikitext transformation
+ * isHTML: bool, $text is HTML, armour it against most wikitext transformation
+ * isRawHTML: bool, $text is raw HTML, armour it against all wikitext transformation
* isChildObj: bool, $text is a DOM node needing expansion in a child frame
* isLocalObj: bool, $text is a DOM node needing expansion in the current frame
* nowiki: bool, wiki markup in $text should be escaped
@@ -4060,6 +4076,9 @@ class Parser {
// Extract flags
$flags = $output;
$output = $flags[0];
+ if ( isset( $flags['isRawHTML'] ) ) {
+ $markerType = 'nowiki';
+ }
if ( isset( $flags['markerType'] ) ) {
$markerType = $flags['markerType'];
}
@@ -5056,7 +5075,12 @@ class Parser {
* found The text returned is valid, stop processing the template. This
* is on by default.
* nowiki Wiki markup in the return value should be escaped
- * isHTML The returned text is HTML, armour it against wikitext transformation
+ * isHTML The returned text is HTML, armour it
+ * against most wikitext transformation, but
+ * perform language conversion and some other
+ * postprocessing
+ * isRawHTML The returned text is raw HTML, include it
+ * verbatim in the output.
*
* @param string $id The magic word ID
* @param callable $callback The callback function (and object) to use