aboutsummaryrefslogtreecommitdiffstats
path: root/includes/parser/Preprocessor_Hash.php
diff options
context:
space:
mode:
authorC. Scott Ananian <cscott@cscott.net>2017-01-19 14:58:05 -0500
committerReedy <reedy@wikimedia.org>2017-05-23 15:43:49 +0100
commit186a182a150f20475f0887cbc261d9be01dbfd98 (patch)
treeaf33749287184af6a8b6d203cf6da804c84b8aaf /includes/parser/Preprocessor_Hash.php
parent1928a85867dcbdb035d2cb2432efb91353826fba (diff)
downloadmediawikicore-186a182a150f20475f0887cbc261d9be01dbfd98.tar.gz
mediawikicore-186a182a150f20475f0887cbc261d9be01dbfd98.zip
Protect language converter markup in the preprocessor (take 2).
This revises 28774022769d2273be16c6c6e1cca710a1fd97ef, which was reverted in master due to unexpected issues with `-{{...}} ` markup on translatewiki and enwiki. Test cases are added to ensure that this is parsed as a template, not as language converter markup. https://www.mediawiki.org/wiki/Preprocessor_ABNF is the canonical documentation for the preprocessor; this will be updated after this patch is merged. The basic principles described in that page are maintained in this patch: * Rightmost opening structure has precedence: `-{{` is parsed as a dash followed by template opening. * `{{{` has precedence over `{{` and `-{`: `-{{{{` is parsed as `-{` `{{{` since we first grab the rightmost `{{{`. A bunch of test cases were added to verify the "ideal precedence" order described on that wiki page. This patch introduced some minor incompatibilities in existing markup, in particular with chemical formulae in templates. Fixes for these are being tracked at https://www.mediawiki.org/wiki/Parsoid/Language_conversion/Preprocessor_fixups Bug: T146304 Bug: T153761 Change-Id: I2f0c186c75e392c95e1a3d89266cae2586349150
Diffstat (limited to 'includes/parser/Preprocessor_Hash.php')
-rw-r--r--includes/parser/Preprocessor_Hash.php63
1 files changed, 48 insertions, 15 deletions
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php
index b2e9531ddd3c..597d1f231cce 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -155,8 +155,7 @@ class Preprocessor_Hash extends Preprocessor {
$searchBase = "[{<\n";
if ( !$wgDisableLangConversion ) {
- // FIXME: disabled due to T153761
- // $searchBase .= '-';
+ $searchBase .= '-';
}
// For fast reverse searches
@@ -208,6 +207,13 @@ class Preprocessor_Hash extends Preprocessor {
$search = $searchBase;
if ( $stack->top === false ) {
$currentClosing = '';
+ } elseif (
+ $stack->top->close === '}-' &&
+ $stack->top->count > 2
+ ) {
+ # adjust closing for -{{{...{{
+ $currentClosing = '}';
+ $search .= $currentClosing;
} else {
$currentClosing = $stack->top->close;
$search .= $currentClosing;
@@ -264,11 +270,15 @@ class Preprocessor_Hash extends Preprocessor {
} elseif ( isset( $this->rules[$curChar] ) ) {
$found = 'open';
$rule = $this->rules[$curChar];
- } elseif ( $curChar == '-' ) {
- $found = 'dash';
} else {
- # Some versions of PHP have a strcspn which stops on null characters
- # Ignore and continue
+ # Some versions of PHP have a strcspn which stops on
+ # null characters; ignore these and continue.
+ # We also may get '-' and '}' characters here which
+ # don't match -{ or $currentClosing. Add these to
+ # output and continue.
+ if ( $curChar == '-' || $curChar == '}' ) {
+ self::addLiteral( $accum, $curChar );
+ }
++$i;
continue;
}
@@ -558,7 +568,10 @@ class Preprocessor_Hash extends Preprocessor {
} elseif ( $found == 'open' ) {
# count opening brace characters
$curLen = strlen( $curChar );
- $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i );
+ $count = ( $curLen > 1 ) ?
+ # allow the final character to repeat
+ strspn( $text, $curChar[$curLen-1], $i+1 ) + 1 :
+ strspn( $text, $curChar, $i );
# we need to add to stack only if opening brace count is enough for one of the rules
if ( $count >= $rule['min'] ) {
@@ -577,17 +590,25 @@ class Preprocessor_Hash extends Preprocessor {
# Add literal brace(s)
self::addLiteral( $accum, str_repeat( $curChar, $count ) );
}
- $i += $curLen * $count;
+ $i += $count;
} elseif ( $found == 'close' ) {
$piece = $stack->top;
# lets check if there are enough characters for closing brace
$maxCount = $piece->count;
+ if ( $piece->close === '}-' && $curChar === '}' ) {
+ $maxCount--; # don't try to match closing '-' as a '}'
+ }
$curLen = strlen( $curChar );
- $count = ( $curLen > 1 ) ? 1 : strspn( $text, $curChar, $i, $maxCount );
+ $count = ( $curLen > 1 ) ? $curLen :
+ strspn( $text, $curChar, $i, $maxCount );
# check for maximum matching characters (if there are 5 closing
# characters, we will probably need only 3 - depending on the rules)
$rule = $this->rules[$piece->open];
+ if ( $piece->close === '}-' && $piece->count > 2 ) {
+ # tweak for -{..{{ }}..}-
+ $rule = $this->rules['{'];
+ }
if ( $count > $rule['max'] ) {
# The specified maximum exists in the callback array, unless the caller
# has made an error
@@ -605,15 +626,17 @@ class Preprocessor_Hash extends Preprocessor {
if ( $matchingCount <= 0 ) {
# No matching element found in callback array
# Output a literal closing brace and continue
- self::addLiteral( $accum, str_repeat( $curChar, $count ) );
- $i += $curLen * $count;
+ $endText = substr( $text, $i, $count );
+ self::addLiteral( $accum, $endText );
+ $i += $count;
continue;
}
$name = $rule['names'][$matchingCount];
if ( $name === null ) {
// No element, just literal text
+ $endText = substr( $text, $i, $matchingCount );
$element = $piece->breakSyntax( $matchingCount );
- self::addLiteral( $element, str_repeat( $rule['end'], $matchingCount ) );
+ self::addLiteral( $element, $endText );
} else {
# Create XML element
$parts = $piece->parts;
@@ -648,7 +671,7 @@ class Preprocessor_Hash extends Preprocessor {
}
# Advance input pointer
- $i += $curLen * $matchingCount;
+ $i += $matchingCount;
# Unwind the stack
$stack->pop();
@@ -664,7 +687,12 @@ class Preprocessor_Hash extends Preprocessor {
$stack->push( $piece );
$accum =& $stack->getAccum();
} else {
- self::addLiteral( $accum, str_repeat( $piece->open, $piece->count ) );
+ $s = substr( $piece->open, 0, -1 );
+ $s .= str_repeat(
+ substr( $piece->open, -1 ),
+ $piece->count - strlen( $s )
+ );
+ self::addLiteral( $accum, $s );
}
}
@@ -762,7 +790,12 @@ class PPDStackElement_Hash extends PPDStackElement {
if ( $openingCount === false ) {
$openingCount = $this->count;
}
- $accum = [ str_repeat( $this->open, $openingCount ) ];
+ $s = substr( $this->open, 0, -1 );
+ $s .= str_repeat(
+ substr( $this->open, -1 ),
+ $openingCount - strlen( $s )
+ );
+ $accum = [ $s ];
$lastIndex = 0;
$first = true;
foreach ( $this->parts as $part ) {