aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArlo Breault <abreault@wikimedia.org>2021-02-10 10:42:26 -0500
committerArlo Breault <abreault@wikimedia.org>2021-02-16 19:26:29 -0500
commitc44a3958a35eb121499e12ec56a08d3a2f83c9c0 (patch)
tree332c865b622ee924f400685405f04042d42d29eb
parent5d661871223dd132f52b5ac071541b4ceae3bac6 (diff)
downloadmediawikicore-c44a3958a35eb121499e12ec56a08d3a2f83c9c0.tar.gz
mediawikicore-c44a3958a35eb121499e12ec56a08d3a2f83c9c0.zip
Don't apply French spacing in raw text elements
This also means we don't need to take special care for French spacing in attributes, since it's no longer applied there. Adds a test that captures this change. Note that the test "Nowiki and french spacing" wonders whether this escaping should be applied to nowiki content. Bug: T255007 Change-Id: Ic8965e81882d7cf024bdced437f684064a30ac86
-rw-r--r--includes/parser/Parser.php16
-rw-r--r--includes/parser/Sanitizer.php3
-rw-r--r--includes/tidy/RemexCompatFormatter.php16
-rw-r--r--includes/tidy/RemexDriver.php5
-rw-r--r--tests/parser/parserTests.txt23
-rw-r--r--tests/phpunit/includes/parser/ParserTest.php6
6 files changed, 54 insertions, 15 deletions
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 40463d86b119..3562410d6200 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -36,6 +36,7 @@ use MediaWiki\Revision\RevisionAccessException;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\SlotRecord;
use MediaWiki\SpecialPage\SpecialPageFactory;
+use MediaWiki\Tidy\RemexDriver;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use Wikimedia\IPUtils;
@@ -354,6 +355,9 @@ class Parser {
/** @var HookRunner */
private $hookRunner;
+ /** @var RemexDriver */
+ private $remexDriver;
+
/**
* @internal For use by ServiceWiring
*/
@@ -378,7 +382,8 @@ class Parser {
'StylePath',
'TranscludeCacheExpiry',
'PreprocessorCacheThreshold',
- 'DisableLangConversion'
+ 'DisableLangConversion',
+ 'TidyConfig',
];
/**
@@ -468,6 +473,10 @@ class Parser {
MediaWikiServices::getInstance()->getHookContainer();
$this->hookRunner = new HookRunner( $this->hookContainer );
+ $this->remexDriver = new RemexDriver(
+ $this->svcOptions->get( 'TidyConfig' ) ?? []
+ );
+
// T250444: This will eventually be inlined here and the
// standalone method removed.
$this->firstCallInit();
@@ -1673,12 +1682,9 @@ class Parser {
$text = $this->mStripState->unstripGeneral( $text );
- # Clean up special characters, only run once, after doBlockLevels
- $text = Sanitizer::armorFrenchSpaces( $text );
-
$text = Sanitizer::normalizeCharReferences( $text );
- $text = MWTidy::tidy( $text );
+ $text = $this->remexDriver->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
if ( $isMain ) {
$this->hookRunner->onParserAfterTidy( $this, $text );
diff --git a/includes/parser/Sanitizer.php b/includes/parser/Sanitizer.php
index e9f9ee2a4af0..99a074877201 100644
--- a/includes/parser/Sanitizer.php
+++ b/includes/parser/Sanitizer.php
@@ -784,9 +784,6 @@ class Sanitizer {
'__' => '&#95;_',
] );
- # Armor against French spaces detection (T5158)
- $encValue = self::armorFrenchSpaces( $encValue, '&#32;' );
-
# Stupid hack
$encValue = preg_replace_callback(
'/((?i)' . wfUrlProtocols() . ')/',
diff --git a/includes/tidy/RemexCompatFormatter.php b/includes/tidy/RemexCompatFormatter.php
index abc98baab4b2..4a199fef8d3e 100644
--- a/includes/tidy/RemexCompatFormatter.php
+++ b/includes/tidy/RemexCompatFormatter.php
@@ -16,18 +16,34 @@ class RemexCompatFormatter extends HtmlFormatter {
'tr' => true,
];
+ /* @var ?callable */
+ private $textProcessor;
+
public function __construct( $options = [] ) {
parent::__construct( $options );
$this->attributeEscapes["\u{00A0}"] = '&#160;';
unset( $this->attributeEscapes["&"] );
$this->textEscapes["\u{00A0}"] = '&#160;';
unset( $this->textEscapes["&"] );
+ $this->textProcessor = $options['textProcessor'] ?? null;
}
public function startDocument( $fragmentNamespace, $fragmentName ) {
return '';
}
+ public function characters( SerializerNode $parent, $text, $start, $length ) {
+ $text = parent::characters( $parent, $text, $start, $length );
+ if ( $parent->namespace !== HTMLData::NS_HTML
+ || !isset( $this->rawTextElements[$parent->name] )
+ ) {
+ if ( $this->textProcessor !== null ) {
+ $text = call_user_func( $this->textProcessor, $text );
+ }
+ }
+ return $text;
+ }
+
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
$data = $node->snData;
if ( $data && $data->isPWrapper ) {
diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php
index 75031adbe947..96a2dbf268ae 100644
--- a/includes/tidy/RemexDriver.php
+++ b/includes/tidy/RemexDriver.php
@@ -29,12 +29,11 @@ class RemexDriver extends TidyDriverBase {
parent::__construct( $config );
}
- public function tidy( $text ) {
+ public function tidy( $text, callable $textProcessor = null ) {
$traceCallback = static function ( $msg ) {
wfDebug( "RemexHtml: $msg" );
};
-
- $formatter = new RemexCompatFormatter;
+ $formatter = new RemexCompatFormatter( [ 'textProcessor' => $textProcessor ] );
if ( $this->serializerTrace ) {
$serializer = new SerializerWithTracer( $formatter, null, $traceCallback );
} else {
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 79a1725b6e51..d6828fd45c34 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -1754,6 +1754,23 @@ Nowiki and french spacing
<p><span typeof="mw:Nowiki">test<span typeof="mw:DisplaySpace"> </span>: 123</span></p>
!! end
+!! test
+T255007: French spacing in raw text elements
+!! options
+wgRawHtml=1
+!! wikitext
+<html>
+<script>test ; 123</script>
+<style>test : 123</style>
+</html>
+!! html/php
+<p>
+<script>test ; 123</script>
+<style>test : 123</style>
+
+</p>
+!! end
+
###
### Comments
###
@@ -18253,7 +18270,7 @@ Punctuation: CSS ! important (T13874; with space after)
!! wikitext
<div style="width:50% ! important">important</div>
!! html
-<div style="width:50%&#32;! important">important</div>
+<div style="width:50% ! important">important</div>
!! end
!! test
@@ -22820,7 +22837,7 @@ Play a bit with r67090 and T5158
<div style="width:50% !important">&#160;</div>
<div style="width:50%&#160;!important">&#160;</div>
<div style="width:50%&#160;!important">&#160;</div>
-<div style="border&#32;: solid;">&#160;</div>
+<div style="border : solid;">&#160;</div>
!! html/parsoid
<div style="width:50% !important" data-parsoid='{"stx":"html"}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
<div style="width:50% !important" data-parsoid='{"stx":"html","a":{"style":"width:50% !important"},"sa":{"style":"width:50%&amp;nbsp;!important"}}'><span typeof="mw:Entity" data-parsoid='{"srcContent":" "}'> </span></div>
@@ -22860,7 +22877,7 @@ T5158: Test for French spaces in attributes
!! wikitext
<br style=" clear : both ; " />
!! html/php
-<p><br style="clear&#32;: both&#32;;" />
+<p><br style="clear : both ;" />
</p>
!! end
diff --git a/tests/phpunit/includes/parser/ParserTest.php b/tests/phpunit/includes/parser/ParserTest.php
index 0255c0342c8e..6853db6d1cc6 100644
--- a/tests/phpunit/includes/parser/ParserTest.php
+++ b/tests/phpunit/includes/parser/ParserTest.php
@@ -8,7 +8,11 @@ class ParserTest extends MediaWikiIntegrationTestCase {
// Create a mock Config object that will satisfy ServiceOptions::__construct
$mockConfig = $this->createMock( Config::class );
$mockConfig->method( 'has' )->willReturn( true );
- $mockConfig->method( 'get' )->willReturn( 'I like otters.' );
+ $mockConfig->method( 'get' )->will(
+ $this->returnCallback( function ( $arg ) {
+ return ( $arg === 'TidyConfig' ) ? null : 'I like otters.';
+ } )
+ );
// Stub out a MagicWordFactory so the Parser can initialize its
// function hooks when it is created.