aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--includes/AutoLoader.php3
-rw-r--r--includes/parser/LinkHolderArray.php90
-rw-r--r--includes/parser/Parser.php269
-rw-r--r--includes/parser/Preprocessor_DOM.php2
-rw-r--r--includes/parser/Preprocessor_Hash.php2
-rw-r--r--includes/parser/StripState.php138
6 files changed, 330 insertions, 174 deletions
diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php
index 019c94dfbff6..4b7c34642d25 100644
--- a/includes/AutoLoader.php
+++ b/includes/AutoLoader.php
@@ -518,7 +518,6 @@ $wgAutoloadLocalClasses = array(
'DateFormatter' => 'includes/parser/DateFormatter.php',
'LinkHolderArray' => 'includes/parser/LinkHolderArray.php',
'LinkMarkerReplacer' => 'includes/parser/Parser_LinkHooks.php',
- 'OnlyIncludeReplacer' => 'includes/parser/Parser.php',
'PPCustomFrame_Hash' => 'includes/parser/Preprocessor_Hash.php',
'PPCustomFrame_DOM' => 'includes/parser/Preprocessor_DOM.php',
'PPDAccum_Hash' => 'includes/parser/Preprocessor_Hash.php',
@@ -548,7 +547,7 @@ $wgAutoloadLocalClasses = array(
'Preprocessor' => 'includes/parser/Preprocessor.php',
'Preprocessor_DOM' => 'includes/parser/Preprocessor_DOM.php',
'Preprocessor_Hash' => 'includes/parser/Preprocessor_Hash.php',
- 'StripState' => 'includes/parser/Parser.php',
+ 'StripState' => 'includes/parser/StripState.php',
'MWTidy' => 'includes/parser/Tidy.php',
# includes/search
diff --git a/includes/parser/LinkHolderArray.php b/includes/parser/LinkHolderArray.php
index 31e121f42762..1a76fa867c25 100644
--- a/includes/parser/LinkHolderArray.php
+++ b/includes/parser/LinkHolderArray.php
@@ -12,6 +12,7 @@ class LinkHolderArray {
var $internals = array(), $interwikis = array();
var $size = 0;
var $parent;
+ protected $tempIdOffset;
function __construct( $parent ) {
$this->parent = $parent;
@@ -27,6 +28,15 @@ class LinkHolderArray {
}
/**
+ * Don't serialize the parent object, it is big, and not needed when it is
+ * a parameter to mergeForeign(), which is the only application of
+ * serializing at present.
+ */
+ function __sleep() {
+ return array( 'internals', 'interwikis', 'size' );
+ }
+
+ /**
* Merge another LinkHolderArray into this one
* @param $other LinkHolderArray
*/
@@ -43,6 +53,86 @@ class LinkHolderArray {
}
/**
+ * Merge a LinkHolderArray from another parser instance into this one. The
+ * keys will not be preserved. Any text which went with the old
+ * LinkHolderArray and needs to work with the new one should be passed in
+ * the $texts array. The strings in this array will have their link holders
+ * converted for use in the destination link holder. The resulting array of
+ * strings will be returned.
+ *
+ * @param $other LinkHolderArray
+ * @param $text Array of strings
+ * @return Array
+ */
+ function mergeForeign( $other, $texts ) {
+ $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
+ $maxId = 0;
+
+ # Renumber internal links
+ foreach ( $other->internals as $ns => $nsLinks ) {
+ foreach ( $nsLinks as $key => $entry ) {
+ $newKey = $idOffset + $key;
+ $this->internals[$ns][$newKey] = $entry;
+ $maxId = $newKey > $maxId ? $newKey : $maxId;
+ }
+ }
+ $texts = preg_replace_callback( '(<!--LINK \d+:)(\d+)(-->)',
+ array( $this, 'mergeForeignCallback' ), $texts );
+
+ # Renumber interwiki links
+ foreach ( $links['interwiki'] as $key => $entry ) {
+ $newKey = $idOffset + $key;
+ $this->interwikis[$newKey] = $entry;
+ $maxId = $newKey > $maxId ? $newKey : $maxId;
+
+ }
+ $texts = preg_replace_callback( '(<!--IWLINK )(\d+)(-->)',
+ array( $this, 'mergeForeignCallback' ), $texts );
+
+ # Set the parent link ID to be the highest used ID
+ $this->parent->setLinkID( $maxId );
+ $this->tempIdOffset = null;
+ }
+
+ protected function mergeForeignCallback( $m ) {
+ return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
+ }
+
+ /**
+ * Get a subset of the current LinkHolderArray which is sufficient to
+ * interpret the given text.
+ */
+ function getSubArray( $text ) {
+ $sub = new LinkHolderArray( $this->parent );
+
+ # Internal links
+ $pos = 0;
+ while ( $pos < strlen( $text ) ) {
+ if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
+ $text, $m, PREG_OFFSET_CAPTURE, $pos ) )
+ {
+ break;
+ }
+ $ns = $m[1][0];
+ $key = $m[2][0];
+ $sub->internals[$ns][$key] = $this->internals[$ns][$key];
+ $pos = $m[0][1] + strlen( $m[0][0] );
+ }
+
+ # Interwiki links
+ $pos = 0;
+ while ( $pos < strlen( $text ) ) {
+ if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
+ break;
+ }
+ $key = $m[1][0];
+ $sub->interwikis[$key] = $this->interwikis[$key];
+ $pos = $m[0][1] + strlen( $m[0][0] );
+ }
+ return $sub;
+ }
+
+ /**
* Returns true if the memory requirements of this object are getting large
*/
function isBig() {
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 8bbeb2db2e10..afa39345a186 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -55,6 +55,12 @@ class Parser {
*/
const VERSION = '1.6.4';
+ /**
+ * Update this version number when the output of serialiseHalfParsedText()
+ * changes in an incompatible way
+ */
+ const HALF_PARSED_VERSION = 2;
+
# Flags for Parser::setFunctionHook
# Also available as global constants from Defines.php
const SFH_NO_HASH = 1;
@@ -203,7 +209,6 @@ class Parser {
$this->mLastSection = '';
$this->mDTopen = false;
$this->mIncludeCount = array();
- $this->mStripState = new StripState;
$this->mArgStack = false;
$this->mInPre = false;
$this->mLinkHolders = new LinkHolderArray( $this );
@@ -226,6 +231,7 @@ class Parser {
# $this->mUniqPrefix = "\x07UNIQ" . Parser::getRandomString();
# Changed to \x7f to allow XML double-parsing -- TS
$this->mUniqPrefix = "\x7fUNIQ" . self::getRandomString();
+ $this->mStripState = new StripState( $this->mUniqPrefix );
# Clear these on every parse, bug 4549
@@ -353,23 +359,7 @@ class Parser {
wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
-//!JF Move to its own function
-
- $uniq_prefix = $this->mUniqPrefix;
- $matches = array();
- $elements = array_keys( $this->mTransparentTagHooks );
- $text = $this->extractTagsAndParams( $elements, $text, $matches, $uniq_prefix );
-
- foreach ( $matches as $marker => $data ) {
- list( $element, $content, $params, $tag ) = $data;
- $tagName = strtolower( $element );
- if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
- $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
- } else {
- $output = $tag;
- }
- $this->mStripState->general->setPair( $marker, $output );
- }
+ $text = $this->replaceTransparentTags( $text );
$text = $this->mStripState->unstripGeneral( $text );
$text = Sanitizer::normalizeCharReferences( $text );
@@ -620,6 +610,10 @@ class Parser {
return $this->mLinkID++;
}
+ function setLinkID( $id ) {
+ $this->mLinkID = $id;
+ }
+
/**
* @return Language
*/
@@ -793,7 +787,7 @@ class Parser {
function insertStripItem( $text ) {
$rnd = "{$this->mUniqPrefix}-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
$this->mMarkerIndex++;
- $this->mStripState->general->setPair( $rnd, $text );
+ $this->mStripState->addGeneral( $rnd, $text );
return $rnd;
}
@@ -3542,9 +3536,9 @@ class Parser {
if ( $markerType === 'none' ) {
return $output;
} elseif ( $markerType === 'nowiki' ) {
- $this->mStripState->nowiki->setPair( $marker, $output );
+ $this->mStripState->addNoWiki( $marker, $output );
} elseif ( $markerType === 'general' ) {
- $this->mStripState->general->setPair( $marker, $output );
+ $this->mStripState->addGeneral( $marker, $output );
} else {
throw new MWException( __METHOD__.': invalid marker type' );
}
@@ -4859,6 +4853,30 @@ class Parser {
}
/**
+ * Replace transparent tags in $text with the values given by the callbacks.
+ *
+ * Transparent tag hooks are like regular XML-style tag hooks, except they
+ * operate late in the transformation sequence, on HTML instead of wikitext.
+ */
+ function replaceTransparentTags( $text ) {
+ $matches = array();
+ $elements = array_keys( $this->mTransparentTagHooks );
+ $text = $this->extractTagsAndParams( $elements, $text, $matches, $this->mUniqPrefix );
+
+ foreach ( $matches as $marker => $data ) {
+ list( $element, $content, $params, $tag ) = $data;
+ $tagName = strtolower( $element );
+ if ( isset( $this->mTransparentTagHooks[$tagName] ) ) {
+ $output = call_user_func_array( $this->mTransparentTagHooks[$tagName], array( $content, $params, $this ) );
+ } else {
+ $output = $tag;
+ }
+ $this->mStripState->addGeneral( $marker, $output );
+ }
+ return $text;
+ }
+
+ /**
* Break wikitext input into sections, and either pull or replace
* some particular section's text.
*
@@ -5203,6 +5221,17 @@ class Parser {
return $this->testSrvus( $text, $title, $options, self::OT_PREPROCESS );
}
+ /**
+ * Call a callback function on all regions of the given text that are not
+ * inside strip markers, and replace those regions with the return value
+ * of the callback. For example, with input:
+ *
+ * aaa<MARKER>bbb
+ *
+ * This will call the callback function twice, with 'aaa' and 'bbb'. Those
+ * two strings will be replaced with the value returned by the callback in
+ * each case.
+ */
function markerSkipCallback( $s, $callback ) {
$i = 0;
$out = '';
@@ -5227,168 +5256,68 @@ class Parser {
return $out;
}
- function serialiseHalfParsedText( $text ) {
- $data = array();
- $data['text'] = $text;
-
- # First, find all strip markers, and store their
- # data in an array.
- $stripState = new StripState;
- $pos = 0;
- while ( ( $start_pos = strpos( $text, $this->mUniqPrefix, $pos ) )
- && ( $end_pos = strpos( $text, self::MARKER_SUFFIX, $pos ) ) )
- {
- $end_pos += strlen( self::MARKER_SUFFIX );
- $marker = substr( $text, $start_pos, $end_pos-$start_pos );
-
- if ( !empty( $this->mStripState->general->data[$marker] ) ) {
- $replaceArray = $stripState->general;
- $stripText = $this->mStripState->general->data[$marker];
- } elseif ( !empty( $this->mStripState->nowiki->data[$marker] ) ) {
- $replaceArray = $stripState->nowiki;
- $stripText = $this->mStripState->nowiki->data[$marker];
- } else {
- throw new MWException( "Hanging strip marker: '$marker'." );
- }
-
- $replaceArray->setPair( $marker, $stripText );
- $pos = $end_pos;
- }
- $data['stripstate'] = $stripState;
-
- # Now, find all of our links, and store THEIR
- # data in an array! :)
- $links = array( 'internal' => array(), 'interwiki' => array() );
- $pos = 0;
-
- # Internal links
- while ( ( $start_pos = strpos( $text, '<!--LINK ', $pos ) ) ) {
- list( $ns, $trail ) = explode( ':', substr( $text, $start_pos + strlen( '<!--LINK ' ) ), 2 );
-
- $ns = trim( $ns );
- if ( empty( $links['internal'][$ns] ) ) {
- $links['internal'][$ns] = array();
- }
-
- $key = trim( substr( $trail, 0, strpos( $trail, '-->' ) ) );
- $links['internal'][$ns][] = $this->mLinkHolders->internals[$ns][$key];
- $pos = $start_pos + strlen( "<!--LINK $ns:$key-->" );
- }
-
- $pos = 0;
-
- # Interwiki links
- while ( ( $start_pos = strpos( $text, '<!--IWLINK ', $pos ) ) ) {
- $data = substr( $text, $start_pos );
- $key = trim( substr( $data, 0, strpos( $data, '-->' ) ) );
- $links['interwiki'][] = $this->mLinkHolders->interwiki[$key];
- $pos = $start_pos + strlen( "<!--IWLINK $key-->" );
- }
-
- $data['linkholder'] = $links;
-
+ /**
+ * Save the parser state required to convert the given half-parsed text to
+ * HTML. "Half-parsed" in this context means the output of
+ * recursiveTagParse() or internalParse(). This output has strip markers
+ * from replaceVariables (extensionSubstitution() etc.), and link
+ * placeholders from replaceLinkHolders().
+ *
+ * Returns an array which can be serialized and stored persistently. This
+ * array can later be loaded into another parser instance with
+ * unserializeHalfParsed(). The text can then be safely incorporated into
+ * the return value of a parser hook.
+ */
+ function serializeHalfParsedText( $text ) {
+ wfProfileIn( __METHOD__ );
+ $data = array(
+ 'text' => $text,
+ 'version' => self::HALF_PARSED_VERSION,
+ 'stripState' => $this->mStripState->getSubState( $text ),
+ 'linkHolders' => $this->mLinkHolders->getSubArray( $text )
+ );
+ wfProfileOut( __METHOD__ );
return $data;
}
/**
- * TODO: document
- * @param $data Array
- * @param $intPrefix String unique identifying prefix
+ * Load the parser state given in the $data array, which is assumed to
+ * have been generated by serializeHalfParsedText(). The text contents is
+ * extracted from the array, and its markers are transformed into markers
+ * appropriate for the current Parser instance. This transformed text is
+ * returned, and can be safely included in the return value of a parser
+ * hook.
+ *
+ * If the $data array has been stored persistently, the caller should first
+ * check whether it is still valid, by calling isValidHalfParsedData().
+ *
+ * @param $data Serialized data
* @return String
*/
- function unserialiseHalfParsedText( $data, $intPrefix = null ) {
- if ( !$intPrefix ) {
- $intPrefix = self::getRandomString();
+ function unserializeHalfParsedText( $data ) {
+ if ( !isset( $data['version'] ) || $data['version'] != self::HALF_PARSED_VERSION ) {
+ throw new MWException( __METHOD__.': invalid version' );
}
# First, extract the strip state.
- $stripState = $data['stripstate'];
- $this->mStripState->general->merge( $stripState->general );
- $this->mStripState->nowiki->merge( $stripState->nowiki );
-
- # Now, extract the text, and renumber links
- $text = $data['text'];
- $links = $data['linkholder'];
-
- # Internal...
- foreach ( $links['internal'] as $ns => $nsLinks ) {
- foreach ( $nsLinks as $key => $entry ) {
- $newKey = $intPrefix . '-' . $key;
- $this->mLinkHolders->internals[$ns][$newKey] = $entry;
+ $texts = array( $data['text'] );
+ $texts = $this->mStripState->merge( $data['stripState'], $texts );
- $text = str_replace( "<!--LINK $ns:$key-->", "<!--LINK $ns:$newKey-->", $text );
- }
- }
-
- # Interwiki...
- foreach ( $links['interwiki'] as $key => $entry ) {
- $newKey = "$intPrefix-$key";
- $this->mLinkHolders->interwikis[$newKey] = $entry;
-
- $text = str_replace( "<!--IWLINK $key-->", "<!--IWLINK $newKey-->", $text );
- }
+ # Now renumber links
+ $texts = $this->mLinkHolders->mergeForeign( $data['linkHolders'], $texts );
# Should be good to go.
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class StripState {
- var $general, $nowiki;
-
- function __construct() {
- $this->general = new ReplacementArray;
- $this->nowiki = new ReplacementArray;
+ return $texts[0];
}
- function unstripGeneral( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripNoWiki( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-
- function unstripBoth( $text ) {
- wfProfileIn( __METHOD__ );
- do {
- $oldText = $text;
- $text = $this->general->replace( $text );
- $text = $this->nowiki->replace( $text );
- } while ( $text !== $oldText );
- wfProfileOut( __METHOD__ );
- return $text;
- }
-}
-
-/**
- * @todo document, briefly.
- * @ingroup Parser
- */
-class OnlyIncludeReplacer {
- var $output = '';
-
- function replace( $matches ) {
- if ( substr( $matches[1], -1 ) === "\n" ) {
- $this->output .= substr( $matches[1], 0, -1 );
- } else {
- $this->output .= $matches[1];
- }
+ /**
+ * Returns true if the given array, presumed to be generated by
+ * serializeHalfParsedText(), is compatible with the current version of the
+ * parser.
+ *
+ * @param $data Array.
+ */
+ function isValidHalfParsedData( $data ) {
+ return isset( $data['version'] ) && $data['version'] == self::HALF_PARSED_VERSION;
}
}
diff --git a/includes/parser/Preprocessor_DOM.php b/includes/parser/Preprocessor_DOM.php
index 3dfd3e11ed0b..0ba580575eaa 100644
--- a/includes/parser/Preprocessor_DOM.php
+++ b/includes/parser/Preprocessor_DOM.php
@@ -1070,7 +1070,7 @@ class PPFrame_DOM implements PPFrame {
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$count = $contextNode->getAttribute( 'level' );
$s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->addGeneral( $marker, '' );
}
$out .= $s;
} else {
diff --git a/includes/parser/Preprocessor_Hash.php b/includes/parser/Preprocessor_Hash.php
index 45af9ee15654..8a51a6b96cd7 100644
--- a/includes/parser/Preprocessor_Hash.php
+++ b/includes/parser/Preprocessor_Hash.php
@@ -1018,7 +1018,7 @@ class PPFrame_Hash implements PPFrame {
$serial = count( $this->parser->mHeadings ) - 1;
$marker = "{$this->parser->mUniqPrefix}-h-$serial-" . Parser::MARKER_SUFFIX;
$s = substr( $s, 0, $bits['level'] ) . $marker . substr( $s, $bits['level'] );
- $this->parser->mStripState->general->setPair( $marker, '' );
+ $this->parser->mStripState->setGeneral( $marker, '' );
$out .= $s;
} else {
# Expand in virtual stack
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
new file mode 100644
index 000000000000..0bf9e17acee5
--- /dev/null
+++ b/includes/parser/StripState.php
@@ -0,0 +1,138 @@
+<?php
+
+/**
+ * @todo document, briefly.
+ * @ingroup Parser
+ */
+class StripState {
+ protected $prefix;
+ protected $data;
+ protected $regex;
+
+ protected $tempType, $tempMergePrefix;
+
+ function __construct( $prefix ) {
+ $this->prefix = $prefix;
+ $this->data = array(
+ 'nowiki' => array(),
+ 'general' => array()
+ );
+ $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
+ }
+
+ /**
+ * Add a nowiki strip item
+ */
+ function addNoWiki( $marker, $value ) {
+ $this->addItem( 'nowiki', $marker, $value );
+ }
+
+ function addGeneral( $marker, $value ) {
+ $this->addItem( 'general', $marker, $value );
+ }
+
+ protected function addItem( $type, $marker, $value ) {
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ throw new MWException( "Invalid marker: $marker" );
+ }
+
+ $this->data[$type][$m[1]] = $value;
+ }
+
+ function unstripGeneral( $text ) {
+ return $this->unstripType( 'general', $text );
+ }
+
+ function unstripNoWiki( $text ) {
+ return $this->unstripType( 'nowiki', $text );
+ }
+
+ function unstripBoth( $text ) {
+ $text = $this->unstripType( 'general', $text );
+ $text = $this->unstripType( 'nowiki', $text );
+ return $text;
+ }
+
+ protected function unstripType( $type, $text ) {
+ // Shortcut
+ if ( !count( $this->data[$type] ) ) {
+ return $text;
+ }
+
+ wfProfileIn( __METHOD__ );
+ $this->tempType = $type;
+ $out = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
+ $this->tempType = null;
+ wfProfileOut( __METHOD__ );
+ return $out;
+ }
+
+ protected function unstripCallback( $m ) {
+ if ( isset( $this->data[$this->tempType][$m[1]] ) ) {
+ return $this->data[$this->tempType][$m[1]];
+ } else {
+ return $m[0];
+ }
+ }
+
+ /**
+ * Get a StripState object which is sufficient to unstrip the given text.
+ * It will contain the minimum subset of strip items necessary.
+ */
+ function getSubState( $text ) {
+ $subState = new StripState( $this->prefix );
+ $pos = 0;
+ while ( true ) {
+ $startPos = strpos( $text, $this->prefix, $pos );
+ $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
+ if ( $startPos === false || $endPos === false ) {
+ break;
+ }
+
+ $endPos += strlen( Parser::MARKER_SUFFIX );
+ $marker = substr( $text, $startPos, $endPos - $startPos );
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ continue;
+ }
+
+ $key = $m[1];
+ if ( isset( $this->data['nowiki'][$key] ) ) {
+ $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
+ } elseif ( isset( $this->data['general'][$key] ) ) {
+ $subState->data['general'][$key] = $this->data['general'][$key];
+ }
+ $pos = $endPos;
+ }
+ return $subState;
+ }
+
+ /**
+ * Merge another StripState object into this one. The strip marker keys
+ * will not be preserved. The strings in the $texts array will have their
+ * strip markers rewritten, the resulting array of strings will be returned.
+ *
+ * @param $otherState StripState
+ * @param $texts Array
+ * @return Array
+ */
+ function merge( $otherState, $texts ) {
+ $mergePrefix = Parser::getRandomString();
+
+ foreach ( $otherState->data as $type => $items ) {
+ foreach ( $items as $key => $value ) {
+ $this->data[$type]["$mergePrefix-$key"] = $value;
+ }
+ }
+
+ $this->tempMergePrefix = $mergePrefix;
+ $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts );
+ $this->tempMergePrefix = null;
+ return $texts;
+ }
+
+ protected function mergeCallback( $m ) {
+ $key = $m[1];
+ return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
+ }
+}
+