diff options
author | Arlo Breault <abreault@wikimedia.org> | 2021-02-10 10:42:26 -0500 |
---|---|---|
committer | Arlo Breault <abreault@wikimedia.org> | 2021-02-16 19:26:29 -0500 |
commit | c44a3958a35eb121499e12ec56a08d3a2f83c9c0 (patch) | |
tree | 332c865b622ee924f400685405f04042d42d29eb | |
parent | 5d661871223dd132f52b5ac071541b4ceae3bac6 (diff) | |
download | mediawikicore-c44a3958a35eb121499e12ec56a08d3a2f83c9c0.tar.gz mediawikicore-c44a3958a35eb121499e12ec56a08d3a2f83c9c0.zip |
Don't apply French spacing in raw text elements
This also means we don't need to take special care for French spacing in
attributes, since it's no longer applied there.
Adds a test that captures this change.
Note that the test "Nowiki and french spacing" wonders whether this
escaping should be applied to nowiki content.
Bug: T255007
Change-Id: Ic8965e81882d7cf024bdced437f684064a30ac86
-rw-r--r-- | includes/parser/Parser.php | 16 | ||||
-rw-r--r-- | includes/parser/Sanitizer.php | 3 | ||||
-rw-r--r-- | includes/tidy/RemexCompatFormatter.php | 16 | ||||
-rw-r--r-- | includes/tidy/RemexDriver.php | 5 | ||||
-rw-r--r-- | tests/parser/parserTests.txt | 23 | ||||
-rw-r--r-- | tests/phpunit/includes/parser/ParserTest.php | 6 |
6 files changed, 54 insertions, 15 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 40463d86b119..3562410d6200 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -36,6 +36,7 @@ use MediaWiki\Revision\RevisionAccessException; use MediaWiki\Revision\RevisionRecord; use MediaWiki\Revision\SlotRecord; use MediaWiki\SpecialPage\SpecialPageFactory; +use MediaWiki\Tidy\RemexDriver; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; use Wikimedia\IPUtils; @@ -354,6 +355,9 @@ class Parser { /** @var HookRunner */ private $hookRunner; + /** @var RemexDriver */ + private $remexDriver; + /** * @internal For use by ServiceWiring */ @@ -378,7 +382,8 @@ class Parser { 'StylePath', 'TranscludeCacheExpiry', 'PreprocessorCacheThreshold', - 'DisableLangConversion' + 'DisableLangConversion', + 'TidyConfig', ]; /** @@ -468,6 +473,10 @@ class Parser { MediaWikiServices::getInstance()->getHookContainer(); $this->hookRunner = new HookRunner( $this->hookContainer ); + $this->remexDriver = new RemexDriver( + $this->svcOptions->get( 'TidyConfig' ) ?? [] + ); + // T250444: This will eventually be inlined here and the // standalone method removed. $this->firstCallInit(); @@ -1673,12 +1682,9 @@ class Parser { $text = $this->mStripState->unstripGeneral( $text ); - # Clean up special characters, only run once, after doBlockLevels - $text = Sanitizer::armorFrenchSpaces( $text ); - $text = Sanitizer::normalizeCharReferences( $text ); - $text = MWTidy::tidy( $text ); + $text = $this->remexDriver->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] ); if ( $isMain ) { $this->hookRunner->onParserAfterTidy( $this, $text ); diff --git a/includes/parser/Sanitizer.php b/includes/parser/Sanitizer.php index e9f9ee2a4af0..99a074877201 100644 --- a/includes/parser/Sanitizer.php +++ b/includes/parser/Sanitizer.php @@ -784,9 +784,6 @@ class Sanitizer { '__' => '__', ] ); - # Armor against French spaces detection (T5158) - $encValue = self::armorFrenchSpaces( $encValue, ' ' ); - # Stupid hack $encValue = preg_replace_callback( '/((?i)' . wfUrlProtocols() . ')/', diff --git a/includes/tidy/RemexCompatFormatter.php b/includes/tidy/RemexCompatFormatter.php index abc98baab4b2..4a199fef8d3e 100644 --- a/includes/tidy/RemexCompatFormatter.php +++ b/includes/tidy/RemexCompatFormatter.php @@ -16,18 +16,34 @@ class RemexCompatFormatter extends HtmlFormatter { 'tr' => true, ]; + /* @var ?callable */ + private $textProcessor; + public function __construct( $options = [] ) { parent::__construct( $options ); $this->attributeEscapes["\u{00A0}"] = ' '; unset( $this->attributeEscapes["&"] ); $this->textEscapes["\u{00A0}"] = ' '; unset( $this->textEscapes["&"] ); + $this->textProcessor = $options['textProcessor'] ?? null; } public function startDocument( $fragmentNamespace, $fragmentName ) { return ''; } + public function characters( SerializerNode $parent, $text, $start, $length ) { + $text = parent::characters( $parent, $text, $start, $length ); + if ( $parent->namespace !== HTMLData::NS_HTML + || !isset( $this->rawTextElements[$parent->name] ) + ) { + if ( $this->textProcessor !== null ) { + $text = call_user_func( $this->textProcessor, $text ); + } + } + return $text; + } + public function element( SerializerNode $parent, SerializerNode $node, $contents ) { $data = $node->snData; if ( $data && $data->isPWrapper ) { diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php index 75031adbe947..96a2dbf268ae 100644 --- a/includes/tidy/RemexDriver.php +++ b/includes/tidy/RemexDriver.php @@ -29,12 +29,11 @@ class RemexDriver extends TidyDriverBase { parent::__construct( $config ); } - public function tidy( $text ) { + public function tidy( $text, callable $textProcessor = null ) { $traceCallback = static function ( $msg ) { wfDebug( "RemexHtml: $msg" ); }; - - $formatter = new RemexCompatFormatter; + $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] ); if ( $this->serializerTrace ) { $serializer = new SerializerWithTracer( $formatter, null, $traceCallback ); } else { diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 79a1725b6e51..d6828fd45c34 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -1754,6 +1754,23 @@ Nowiki and french spacing <p><span typeof="mw:Nowiki">test<span typeof="mw:DisplaySpace"> </span>: 123</span></p> !! end +!! test +T255007: French spacing in raw text elements +!! options +wgRawHtml=1 +!! wikitext +<html> +<script>test ; 123</script> +<style>test : 123</style> +</html> +!! html/php +<p> +<script>test ; 123</script> +<style>test : 123</style> + +</p> +!! end + ### ### Comments ### @@ -18253,7 +18270,7 @@ Punctuation: CSS ! important (T13874; with space after) !! wikitext <div style="width:50% ! important">important</div> !! html -<div style="width:50% ! important">important</div> +<div style="width:50% ! important">important</div> !! end !! test @@ -22820,7 +22837,7 @@ Play a bit with r67090 and T5158 <div style="width:50% !important"> </div> <div style="width:50% !important"> </div> <div style="width:50% !important"> </div> -<div style="border : solid;"> </div> +<div style="border : solid;"> </div> !! html/parsoid <div style="width:50% !important" data-parsoid='{"stx":"html"}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div> <div style="width:50% !important" data-parsoid='{"stx":"html","a":{"style":"width:50% !important"},"sa":{"style":"width:50%&nbsp;!important"}}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div> @@ -22860,7 +22877,7 @@ T5158: Test for French spaces in attributes !! wikitext <br style=" clear : both ; " /> !! html/php -<p><br style="clear : both ;" /> +<p><br style="clear : both ;" /> </p> !! end diff --git a/tests/phpunit/includes/parser/ParserTest.php b/tests/phpunit/includes/parser/ParserTest.php index 0255c0342c8e..6853db6d1cc6 100644 --- a/tests/phpunit/includes/parser/ParserTest.php +++ b/tests/phpunit/includes/parser/ParserTest.php @@ -8,7 +8,11 @@ class ParserTest extends MediaWikiIntegrationTestCase { // Create a mock Config object that will satisfy ServiceOptions::__construct $mockConfig = $this->createMock( Config::class ); $mockConfig->method( 'has' )->willReturn( true ); - $mockConfig->method( 'get' )->willReturn( 'I like otters.' ); + $mockConfig->method( 'get' )->will( + $this->returnCallback( function ( $arg ) { + return ( $arg === 'TidyConfig' ) ? null : 'I like otters.'; + } ) + ); // Stub out a MagicWordFactory so the Parser can initialize its // function hooks when it is created. |