diff options
author | C. Scott Ananian <cscott@cscott.net> | 2017-01-19 14:58:05 -0500 |
---|---|---|
committer | Reedy <reedy@wikimedia.org> | 2017-05-23 15:43:49 +0100 |
commit | 186a182a150f20475f0887cbc261d9be01dbfd98 (patch) | |
tree | af33749287184af6a8b6d203cf6da804c84b8aaf /includes/parser/Preprocessor_Hash.php | |
parent | 1928a85867dcbdb035d2cb2432efb91353826fba (diff) | |
download | mediawikicore-186a182a150f20475f0887cbc261d9be01dbfd98.tar.gz mediawikicore-186a182a150f20475f0887cbc261d9be01dbfd98.zip |
Protect language converter markup in the preprocessor (take 2).
This revises 28774022769d2273be16c6c6e1cca710a1fd97ef, which was
reverted in master due to unexpected issues with `-{{...}} ` markup
on translatewiki and enwiki. Test cases are added to ensure that this
is parsed as a template, not as language converter markup.
https://www.mediawiki.org/wiki/Preprocessor_ABNF is the canonical
documentation for the preprocessor; this will be updated after this
patch is merged. The basic principles described in that page are
maintained in this patch:
* Rightmost opening structure has precedence: `-{{` is parsed as a
dash followed by template opening.
* `{{{` has precedence over `{{` and `-{`: `-{{{{` is parsed as
`-{` `{{{` since we first grab the rightmost `{{{`.
A bunch of test cases were added to verify the "ideal precedence"
order described on that wiki page.
This patch introduced some minor incompatibilities in existing
markup, in particular with chemical formulae in templates.
Fixes for these are being tracked at
https://www.mediawiki.org/wiki/Parsoid/Language_conversion/Preprocessor_fixups
Bug: T146304
Bug: T153761
Change-Id: I2f0c186c75e392c95e1a3d89266cae2586349150
Diffstat (limited to 'includes/parser/Preprocessor_Hash.php')
-rw-r--r-- | includes/parser/Preprocessor_Hash.php | 63 |
1 files changed, 48 insertions, 15 deletions
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php index b2e9531ddd3c..597d1f231cce 100644 --- a/includes/parser/Preprocessor_Hash.php +++ b/includes/parser/Preprocessor_Hash.php @@ -155,8 +155,7 @@ class Preprocessor_Hash extends Preprocessor { $searchBase = "[{<\n"; if ( !$wgDisableLangConversion ) { - // FIXME: disabled due to T153761 - // $searchBase .= '-'; + $searchBase .= '-'; } // For fast reverse searches @@ -208,6 +207,13 @@ class Preprocessor_Hash extends Preprocessor { $search = $searchBase; if ( $stack->top === false ) { $currentClosing = ''; + } elseif ( + $stack->top->close === '}-' && + $stack->top->count > 2 + ) { + # adjust closing for -{{{...{{ + $currentClosing = '}'; + $search .= $currentClosing; } else { $currentClosing = $stack->top->close; $search .= $currentClosing; @@ -264,11 +270,15 @@ class Preprocessor_Hash extends Preprocessor { } elseif ( isset( $this->rules[$curChar] ) ) { $found = 'open'; $rule = $this->rules[$curChar]; - } elseif ( $curChar == '-' ) { - $found = 'dash'; } else { - # Some versions of PHP have a strcspn which stops on null characters - # Ignore and continue + # Some versions of PHP have a strcspn which stops on + # null characters; ignore these and continue. + # We also may get '-' and '}' characters here which + # don't match -{ or $currentClosing. Add these to + # output and continue. + if ( $curChar == '-' || $curChar == '}' ) { + self::addLiteral( $accum, $curChar ); + } ++$i; continue; } @@ -558,7 +568,10 @@ class Preprocessor_Hash extends Preprocessor { } elseif ( $found == 'open' ) { # count opening brace characters $curLen = strlen( $curChar ); - $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i ); + $count = ( $curLen > 1 ) ? + # allow the final character to repeat + strspn( $text, $curChar[$curLen-1], $i+1 ) + 1 : + strspn( $text, $curChar, $i ); # we need to add to stack only if opening brace count is enough for one of the rules if ( $count >= $rule['min'] ) { @@ -577,17 +590,25 @@ class Preprocessor_Hash extends Preprocessor { # Add literal brace(s) self::addLiteral( $accum, str_repeat( $curChar, $count ) ); } - $i += $curLen * $count; + $i += $count; } elseif ( $found == 'close' ) { $piece = $stack->top; # lets check if there are enough characters for closing brace $maxCount = $piece->count; + if ( $piece->close === '}-' && $curChar === '}' ) { + $maxCount--; # don't try to match closing '-' as a '}' + } $curLen = strlen( $curChar ); - $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount ); + $count = ( $curLen > 1 ) ? $curLen : + strspn( $text, $curChar, $i, $maxCount ); # check for maximum matching characters (if there are 5 closing # characters, we will probably need only 3 - depending on the rules) $rule = $this->rules[$piece->open]; + if ( $piece->close === '}-' && $piece->count > 2 ) { + # tweak for -{..{{ }}..}- + $rule = $this->rules['{']; + } if ( $count > $rule['max'] ) { # The specified maximum exists in the callback array, unless the caller # has made an error @@ -605,15 +626,17 @@ class Preprocessor_Hash extends Preprocessor { if ( $matchingCount <= 0 ) { # No matching element found in callback array # Output a literal closing brace and continue - self::addLiteral( $accum, str_repeat( $curChar, $count ) ); - $i += $curLen * $count; + $endText = substr( $text, $i, $count ); + self::addLiteral( $accum, $endText ); + $i += $count; continue; } $name = $rule['names'][$matchingCount]; if ( $name === null ) { // No element, just literal text + $endText = substr( $text, $i, $matchingCount ); $element = $piece->breakSyntax( $matchingCount ); - self::addLiteral( $element, str_repeat( $rule['end'], $matchingCount ) ); + self::addLiteral( $element, $endText ); } else { # Create XML element $parts = $piece->parts; @@ -648,7 +671,7 @@ class Preprocessor_Hash extends Preprocessor { } # Advance input pointer - $i += $curLen * $matchingCount; + $i += $matchingCount; # Unwind the stack $stack->pop(); @@ -664,7 +687,12 @@ class Preprocessor_Hash extends Preprocessor { $stack->push( $piece ); $accum =& $stack->getAccum(); } else { - self::addLiteral( $accum, str_repeat( $piece->open, $piece->count ) ); + $s = substr( $piece->open, 0, -1 ); + $s .= str_repeat( + substr( $piece->open, -1 ), + $piece->count - strlen( $s ) + ); + self::addLiteral( $accum, $s ); } } @@ -762,7 +790,12 @@ class PPDStackElement_Hash extends PPDStackElement { if ( $openingCount === false ) { $openingCount = $this->count; } - $accum = [ str_repeat( $this->open, $openingCount ) ]; + $s = substr( $this->open, 0, -1 ); + $s .= str_repeat( + substr( $this->open, -1 ), + $openingCount - strlen( $s ) + ); + $accum = [ $s ]; $lastIndex = 0; $first = true; foreach ( $this->parts as $part ) { |