diff options
author | Platonides <platonides@users.mediawiki.org> | 2010-01-26 18:58:07 +0000 |
---|---|---|
committer | Platonides <platonides@users.mediawiki.org> | 2010-01-26 18:58:07 +0000 |
commit | 11f8b8390cec0b0331f431b8a11721436a6035f8 (patch) | |
tree | 0468471266f31713883125618d79880b57344d24 /includes/StringUtils.php | |
parent | 68f525870196403b870290776f15555582b61197 (diff) | |
download | mediawikicore-11f8b8390cec0b0331f431b8a11721436a6035f8.tar.gz mediawikicore-11f8b8390cec0b0331f431b8a11721436a6035f8.zip |
Step 4: Profit!!
Add and use PregSplitIterator instead of a direct preg_split.
Slower, but with an upper bound on memory usage.
Notes
Notes:
http://mediawiki.org/wiki/Special:Code/MediaWiki/61528
Diffstat (limited to 'includes/StringUtils.php')
-rw-r--r-- | includes/StringUtils.php | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/includes/StringUtils.php b/includes/StringUtils.php index c437b3c19e48..bab9be4bbe31 100644 --- a/includes/StringUtils.php +++ b/includes/StringUtils.php @@ -179,6 +179,14 @@ class StringUtils { return new ArrayIterator( explode( $separator, $subject ) ); } } + + /** + * Workalike for preg_split() with limited memory usage. + * Returns an Iterator + */ + static function preg_split( $pattern, $subject, $limit = -1, $flags = 0 ) { + return new PregSplitIterator( $pattern, $subject, $limit, $flags ); + } } /** @@ -409,3 +417,82 @@ class ExplodeIterator implements Iterator { } } + +/** + * An iterator which works exactly like: + * + * foreach ( preg_split( $pattern, $s, $limit, $flags ) as $element ) { + * ... + * } + * + * Except it doesn't use huge amounts of memory when $limit is -1 + * + * The flag PREG_SPLIT_OFFSET_CAPTURE isn't supported. + */ +class PregSplitIterator implements Iterator { + // The subject string + var $pattern, $subject, $originalLimit, $flags; + + // The last extracted group of items. + var $smallArray; + + // The position on the iterator. + var $curPos; + + const MAX_LIMIT = 100; + + /** + * Construct a PregSplitIterator + */ + function __construct( $pattern, $s, $limit, $flags) { + $this->pattern = $pattern; + $this->subject = $s; + $this->originalLimit = $limit; + $this->flags = $flags; + + $this->rewind(); + } + + private function effectiveLimit() { + if ($this->originalLimit == -1) { + return self::MAX_LIMIT + 1; + } else if ($this->limit > self::MAX_LIMIT) { + $this->limit -= self::MAX_LIMIT; + return self::MAX_LIMIT + 1; + } else { + $old = $this->limit; + $this->limit = 0; + return $old; + } + } + + function rewind() { + $this->curPos = 0; + $this->limit = $this->originalLimit; + if ($this->limit == -1) $this->limit = self::MAX_LIMIT; + $this->smallArray = preg_split( $this->pattern, $this->subject, $this->effectiveLimit(), $this->flags); + } + + function current() { + return $this->smallArray[$this->curPos % self::MAX_LIMIT]; + } + + function key() { + return $this->curPos; + } + + function next() { + $this->curPos++; + if ( $this->curPos % self::MAX_LIMIT == 0 ) { + # Last item contains the rest unsplitted. + if ($this->limit > 0) { + $this->smallArray = preg_split( $this->pattern, $this->smallArray[self::MAX_LIMIT], $this->effectiveLimit(), $this->flags); + } + } + return; + } + + function valid() { + return $this->curPos % self::MAX_LIMIT < count($this->smallArray); + } +} |