diff options
4 files changed, 29 insertions, 24 deletions
diff --git a/includes/OutputTransform/Stages/DeduplicateStyles.php b/includes/OutputTransform/Stages/DeduplicateStyles.php index 692e5933a910..f80e70f1896d 100644 --- a/includes/OutputTransform/Stages/DeduplicateStyles.php +++ b/includes/OutputTransform/Stages/DeduplicateStyles.php @@ -2,11 +2,12 @@ namespace MediaWiki\OutputTransform\Stages; -use MediaWiki\Html\Html; +use MediaWiki\Html\HtmlHelper; use MediaWiki\OutputTransform\ContentTextTransformStage; use MediaWiki\Parser\ParserOptions; use MediaWiki\Parser\ParserOutput; -use MediaWiki\Parser\Sanitizer; +use Wikimedia\RemexHtml\Serializer\SerializerNode; +use Wikimedia\RemexHtml\Tokenizer\PlainAttributes; /** * Generates a list of unique style links @@ -20,28 +21,32 @@ class DeduplicateStyles extends ContentTextTransformStage { protected function transformText( string $text, ParserOutput $po, ?ParserOptions $popts, array &$options ): string { $seen = []; - return preg_replace_callback( '#<style\s+([^>]*data-mw-deduplicate\s*=[\'"][^>]*)>.*?</style>#s', - static function ( $m ) use ( &$seen ) { - $attr = Sanitizer::decodeTagAttributes( $m[1] ); - if ( !isset( $attr['data-mw-deduplicate'] ) ) { - return $m[0]; - } - - $key = $attr['data-mw-deduplicate']; + return HtmlHelper::modifyElements( + $text, + static function ( SerializerNode $node ): bool { + return $node->name === 'style' && + ( $node->attrs['data-mw-deduplicate'] ?? '' ) !== ''; + }, + static function ( SerializerNode $node ) use ( &$seen ): SerializerNode { + $key = $node->attrs['data-mw-deduplicate']; if ( !isset( $seen[$key] ) ) { $seen[$key] = true; - - return $m[0]; + return $node; } - // We were going to use an empty <style> here, but there // was concern that would be too much overhead for browsers. // So let's hope a <link> with a non-standard rel and href isn't // going to be misinterpreted or mangled by any subsequent processing. - return Html::element( 'link', [ + $node->name = 'link'; + $node->attrs = new PlainAttributes( [ 'rel' => 'mw-deduplicated-inline-style', 'href' => "mw-data:" . wfUrlencode( $key ), ] ); - }, $text ); + $node->children = []; + $node->void = true; + return $node; + }, + $options['isParsoidContent'] ?? false + ); } } diff --git a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php index 2b10b8bb663d..eae21cbbedcd 100644 --- a/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php +++ b/tests/phpunit/includes/OutputTransform/Stages/DeduplicateStylesTest.php @@ -39,12 +39,12 @@ class DeduplicateStylesTest extends OutputTransformStageTestBase { $dedup = <<<EOF <p>This is a test document.</p> <style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> <style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" /> <style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> <style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style> <style>.Duplicate1 {}</style> EOF; diff --git a/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php b/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php index e28bb3c45378..86b9872924d0 100644 --- a/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php +++ b/tests/phpunit/includes/content/WikitextContentHandlerIntegrationTest.php @@ -103,7 +103,7 @@ class WikitextContentHandlerIntegrationTest extends TextContentHandlerIntegratio 'title' => 'WikitextContentTest_testGetParserOutput', 'model' => CONTENT_MODEL_WIKITEXT, 'text' => "#REDIRECT [[Main Page]]", - 'expectedHtml' => "<div class=\"mw-content-ltr mw-parser-output\" lang=\"en\" dir=\"ltr\" $parsoidVersion><div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/w/index.php?title=Main_Page&action=edit&redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\" /></section></div>", + 'expectedHtml' => "<div class=\"mw-content-ltr mw-parser-output\" lang=\"en\" dir=\"ltr\" $parsoidVersion><div class=\"redirectMsg\"><p>Redirect to:</p><ul class=\"redirectText\"><li><a href=\"/w/index.php?title=Main_Page&action=edit&redlink=1\" class=\"new\" title=\"Main Page (page does not exist)\">Main Page</a></li></ul></div><section data-mw-section-id=\"0\" id=\"mwAQ\"><link rel=\"mw:PageProp/redirect\" href=\"./Main_Page\" id=\"mwAg\"></section></div>", 'expectedFields' => [ 'Links' => [ [ 'Main_Page' => 0 ], diff --git a/tests/phpunit/includes/parser/ParserOutputTest.php b/tests/phpunit/includes/parser/ParserOutputTest.php index 56f6cdcddb35..2b488ffc0a17 100644 --- a/tests/phpunit/includes/parser/ParserOutputTest.php +++ b/tests/phpunit/includes/parser/ParserOutputTest.php @@ -483,12 +483,12 @@ EOF [], $dedupText, <<<EOF <p>This is a test document.</p> <style data-mw-deduplicate="duplicate1">.Duplicate1 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> <style data-mw-deduplicate="duplicate2">.Duplicate2 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate2" /> <style data-mw-not-deduplicate="duplicate1">.Duplicate1 {}</style> -<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1"> +<link rel="mw-deduplicated-inline-style" href="mw-data:duplicate1" /> <style data-mw-deduplicate="duplicate3">.Duplicate1 {}</style> <style>.Duplicate1 {}</style> EOF |