aboutsummaryrefslogtreecommitdiffstats
path: root/includes/parser/StripState.php
diff options
context:
space:
mode:
authorTim Starling <tstarling@users.mediawiki.org>2011-02-23 06:58:15 +0000
committerTim Starling <tstarling@users.mediawiki.org>2011-02-23 06:58:15 +0000
commita20350dd319bf7dd49f9c69e57bf5ad3614b533c (patch)
treefbeaa57db6855d000cf346a2885071297e27cdf3 /includes/parser/StripState.php
parentf17cf7ef0eff66f5286253eebe47d675769db7da (diff)
downloadmediawikicore-a20350dd319bf7dd49f9c69e57bf5ad3614b533c.tar.gz
mediawikicore-a20350dd319bf7dd49f9c69e57bf5ad3614b533c.zip
* Rewrote StripState to not use ReplacementArray. The memory usage of FSS was excessive when there were many (>10k) strip items. I used preg_replace_callback(), which is slower than strtr() in the simplest case, but much faster than it when the markers have different lengths, which they usually do.
* It was not necessary to preserve the $stripState->general->setPair() interface since it wasn't used by any extensions. * Moved StripState to its own file. * Refactored serialiseHalfParsedText() and unserialiseHalfParsedText() so that the bulk of the functionality is in the relevant modules, instead of using scary direct access to object member variables. Made it support the new StripState. It seemed like a lot of work to go to to support an "emergency optimisation" feature in Cite. Cite updates will be in a subsequent commit. * Fixed spelling of serialiseHalfParsedText() and unserialiseHalfParsedText(), there is unavoidable interface breakage anyway, due to cache object versioning. * Moved transparent tags to their own function, as requested in a fixme comment. * Added documentation for markerSkipCallback(). * Removed OnlyIncludeReplacer, unused since MW 1.12.
Notes
Notes: http://mediawiki.org/wiki/Special:Code/MediaWiki/82645
Diffstat (limited to 'includes/parser/StripState.php')
-rw-r--r--includes/parser/StripState.php138
1 files changed, 138 insertions, 0 deletions
diff --git a/includes/parser/StripState.php b/includes/parser/StripState.php
new file mode 100644
index 000000000000..0bf9e17acee5
--- /dev/null
+++ b/includes/parser/StripState.php
@@ -0,0 +1,138 @@
+<?php
+
+/**
+ * @todo document, briefly.
+ * @ingroup Parser
+ */
+class StripState {
+ protected $prefix;
+ protected $data;
+ protected $regex;
+
+ protected $tempType, $tempMergePrefix;
+
+ function __construct( $prefix ) {
+ $this->prefix = $prefix;
+ $this->data = array(
+ 'nowiki' => array(),
+ 'general' => array()
+ );
+ $this->regex = "/{$this->prefix}([^\x7f]+)" . Parser::MARKER_SUFFIX . '/';
+ }
+
+ /**
+ * Add a nowiki strip item
+ */
+ function addNoWiki( $marker, $value ) {
+ $this->addItem( 'nowiki', $marker, $value );
+ }
+
+ function addGeneral( $marker, $value ) {
+ $this->addItem( 'general', $marker, $value );
+ }
+
+ protected function addItem( $type, $marker, $value ) {
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ throw new MWException( "Invalid marker: $marker" );
+ }
+
+ $this->data[$type][$m[1]] = $value;
+ }
+
+ function unstripGeneral( $text ) {
+ return $this->unstripType( 'general', $text );
+ }
+
+ function unstripNoWiki( $text ) {
+ return $this->unstripType( 'nowiki', $text );
+ }
+
+ function unstripBoth( $text ) {
+ $text = $this->unstripType( 'general', $text );
+ $text = $this->unstripType( 'nowiki', $text );
+ return $text;
+ }
+
+ protected function unstripType( $type, $text ) {
+ // Shortcut
+ if ( !count( $this->data[$type] ) ) {
+ return $text;
+ }
+
+ wfProfileIn( __METHOD__ );
+ $this->tempType = $type;
+ $out = preg_replace_callback( $this->regex, array( $this, 'unstripCallback' ), $text );
+ $this->tempType = null;
+ wfProfileOut( __METHOD__ );
+ return $out;
+ }
+
+ protected function unstripCallback( $m ) {
+ if ( isset( $this->data[$this->tempType][$m[1]] ) ) {
+ return $this->data[$this->tempType][$m[1]];
+ } else {
+ return $m[0];
+ }
+ }
+
+ /**
+ * Get a StripState object which is sufficient to unstrip the given text.
+ * It will contain the minimum subset of strip items necessary.
+ */
+ function getSubState( $text ) {
+ $subState = new StripState( $this->prefix );
+ $pos = 0;
+ while ( true ) {
+ $startPos = strpos( $text, $this->prefix, $pos );
+ $endPos = strpos( $text, Parser::MARKER_SUFFIX, $pos );
+ if ( $startPos === false || $endPos === false ) {
+ break;
+ }
+
+ $endPos += strlen( Parser::MARKER_SUFFIX );
+ $marker = substr( $text, $startPos, $endPos - $startPos );
+ if ( !preg_match( $this->regex, $marker, $m ) ) {
+ continue;
+ }
+
+ $key = $m[1];
+ if ( isset( $this->data['nowiki'][$key] ) ) {
+ $subState->data['nowiki'][$key] = $this->data['nowiki'][$key];
+ } elseif ( isset( $this->data['general'][$key] ) ) {
+ $subState->data['general'][$key] = $this->data['general'][$key];
+ }
+ $pos = $endPos;
+ }
+ return $subState;
+ }
+
+ /**
+ * Merge another StripState object into this one. The strip marker keys
+ * will not be preserved. The strings in the $texts array will have their
+ * strip markers rewritten, the resulting array of strings will be returned.
+ *
+ * @param $otherState StripState
+ * @param $texts Array
+ * @return Array
+ */
+ function merge( $otherState, $texts ) {
+ $mergePrefix = Parser::getRandomString();
+
+ foreach ( $otherState->data as $type => $items ) {
+ foreach ( $items as $key => $value ) {
+ $this->data[$type]["$mergePrefix-$key"] = $value;
+ }
+ }
+
+ $this->tempMergePrefix = $mergePrefix;
+ $texts = preg_replace_callback( $otherState->regex, array( $this, 'mergeCallback' ), $texts );
+ $this->tempMergePrefix = null;
+ return $texts;
+ }
+
+ protected function mergeCallback( $m ) {
+ $key = $m[1];
+ return "{$this->prefix}{$this->tempMergePrefix}-$key" . Parser::MARKER_SUFFIX;
+ }
+}
+