aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Starling <tstarling@wikimedia.org>2022-11-21 09:23:34 +1100
committerTim Starling <tstarling@wikimedia.org>2022-12-20 13:43:44 +1100
commit096ea232082ccd9f43ebd880f20221d3fcb0ca43 (patch)
tree7785567ec0033b8830aba9a1c3d6420ff46c2358
parent850049bd5fc288b3e52ac22d056996d35b5149e5 (diff)
downloadmediawikicore-096ea232082ccd9f43ebd880f20221d3fcb0ca43.tar.gz
mediawikicore-096ea232082ccd9f43ebd880f20221d3fcb0ca43.zip
Update moveToExternal and resolveStubs
Convert these two old scripts to Maintenance subclasses. * Uncomment the resolveStub() call in moveToExternal and fix one obvious bug with it, i.e. the fact that stubs need to be resolved after CGZ blobs are moved. * Replace get_class() with instanceof. * Make the "tiny text" threshold configurable. Normally this is not wanted in WMF production since new revisions are written to ES unconditionally. * Add a dry run mode. * Add an undo log. * Add --skip-resolve option. * Make resolveStub() be much more defensive about what it resolves. * In moveToExternal, make compression optional and do it also for plain text. * Optionally convert the legacy encoding to UTF-8. Bug: T299387 Change-Id: I52d54e3b6b785ac072796031be06499221340f51
-rw-r--r--autoload.php3
-rw-r--r--includes/Maintenance/UndoLog.php55
-rw-r--r--includes/Storage/SqlBlobStore.php12
-rw-r--r--maintenance/storage/moveToExternal.php306
-rw-r--r--maintenance/storage/resolveStubs.php223
5 files changed, 436 insertions, 163 deletions
diff --git a/autoload.php b/autoload.php
index 80460a3216f6..b0ecbecfafe8 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1484,6 +1484,7 @@ $wgAutoloadLocalClasses = [
'MediaWiki\\Maintenance\\MaintenanceParameters' => __DIR__ . '/maintenance/includes/MaintenanceParameters.php',
'MediaWiki\\Maintenance\\MaintenanceRunner' => __DIR__ . '/maintenance/includes/MaintenanceRunner.php',
'MediaWiki\\Maintenance\\OrderedStreamingForkController' => __DIR__ . '/includes/Maintenance/OrderedStreamingForkController.php',
+ 'MediaWiki\\Maintenance\\UndoLog' => __DIR__ . '/includes/Maintenance/UndoLog.php',
'MediaWiki\\MediaWikiServices' => __DIR__ . '/includes/MediaWikiServices.php',
'MediaWiki\\Message\\Converter' => __DIR__ . '/includes/Message/Converter.php',
'MediaWiki\\Message\\MessageFormatterFactory' => __DIR__ . '/includes/Message/MessageFormatterFactory.php',
@@ -2039,6 +2040,7 @@ $wgAutoloadLocalClasses = [
'MoveLogFormatter' => __DIR__ . '/includes/logging/MoveLogFormatter.php',
'MovePage' => __DIR__ . '/includes/MovePage.php',
'MovePageForm' => __DIR__ . '/includes/specials/SpecialMovepage.php',
+ 'MoveToExternal' => __DIR__ . '/maintenance/storage/moveToExternal.php',
'MultiConfig' => __DIR__ . '/includes/config/MultiConfig.php',
'MultiHttpClient' => __DIR__ . '/includes/libs/http/MultiHttpClient.php',
'MultiWriteBagOStuff' => __DIR__ . '/includes/libs/objectcache/MultiWriteBagOStuff.php',
@@ -2239,6 +2241,7 @@ $wgAutoloadLocalClasses = [
'ResetAuthenticationThrottle' => __DIR__ . '/maintenance/resetAuthenticationThrottle.php',
'ResetPageRandom' => __DIR__ . '/maintenance/resetPageRandom.php',
'ResetUserEmail' => __DIR__ . '/maintenance/resetUserEmail.php',
+ 'ResolveStubs' => __DIR__ . '/maintenance/storage/resolveStubs.php',
'ResourceFileCache' => __DIR__ . '/includes/cache/ResourceFileCache.php',
'ResourceLoader' => __DIR__ . '/includes/ResourceLoader/ResourceLoader.php',
'ResourceLoaderCircularDependencyError' => __DIR__ . '/includes/ResourceLoader/CircularDependencyError.php',
diff --git a/includes/Maintenance/UndoLog.php b/includes/Maintenance/UndoLog.php
new file mode 100644
index 000000000000..1cd7e3c11481
--- /dev/null
+++ b/includes/Maintenance/UndoLog.php
@@ -0,0 +1,55 @@
+<?php
+
+namespace MediaWiki\Maintenance;
+
+use Wikimedia\Rdbms\IDatabase;
+
+/**
+ * Update a database while optionally writing SQL that reverses the update to
+ * a file.
+ */
+class UndoLog {
+ private $file;
+ private $dbw;
+
+ /**
+ * @param string|null $fileName
+ * @param IDatabase $dbw
+ */
+ public function __construct( $fileName, IDatabase $dbw ) {
+ if ( $fileName !== null ) {
+ $this->file = fopen( $fileName, 'a' );
+ if ( !$this->file ) {
+ throw new \RuntimeException( 'Unable to open undo log' );
+ }
+ }
+ $this->dbw = $dbw;
+ }
+
+ /**
+ * @param string $table
+ * @param array $newValues
+ * @param array $oldValues
+ * @param string $fname
+ * @return bool
+ */
+ public function update( $table, array $newValues, array $oldValues, $fname ) {
+ $this->dbw->update(
+ $table,
+ $newValues,
+ $oldValues,
+ $fname
+ );
+ $updated = (bool)$this->dbw->affectedRows();
+ if ( $this->file && $updated ) {
+ $table = $this->dbw->tableName( $table );
+ fwrite(
+ $this->file,
+ "UPDATE $table" .
+ ' SET ' . $this->dbw->makeList( $oldValues, IDatabase::LIST_SET ) .
+ ' WHERE ' . $this->dbw->makeList( $newValues, IDatabase::LIST_AND ) . ";\n"
+ );
+ }
+ return $updated;
+ }
+}
diff --git a/includes/Storage/SqlBlobStore.php b/includes/Storage/SqlBlobStore.php
index 76c73985f590..bf611ddc9504 100644
--- a/includes/Storage/SqlBlobStore.php
+++ b/includes/Storage/SqlBlobStore.php
@@ -473,7 +473,7 @@ class SqlBlobStore implements IDBAccessObject, BlobStore {
*/
public function expandBlob( $raw, $flags, $blobAddress = null ) {
if ( is_string( $flags ) ) {
- $flags = explode( ',', $flags );
+ $flags = self::explodeFlags( $flags );
}
// Use external methods for external objects, text in table is URL-only then
@@ -685,6 +685,16 @@ class SqlBlobStore implements IDBAccessObject, BlobStore {
}
/**
+ * Split a comma-separated old_flags value into its constituent parts
+ *
+ * @param string $flagsString
+ * @return array
+ */
+ public static function explodeFlags( string $flagsString ) {
+ return $flagsString === '' ? [] : explode( ',', $flagsString );
+ }
+
+ /**
* Splits a blob address into three parts: the schema, the ID, and parameters/flags.
*
* @since 1.33
diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php
index b3328352b529..cf729c3f13e5 100644
--- a/maintenance/storage/moveToExternal.php
+++ b/maintenance/storage/moveToExternal.php
@@ -1,6 +1,6 @@
<?php
/**
- * Move revision's text to external storage
+ * Move text from the text table to external storage
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,111 +21,257 @@
* @ingroup Maintenance ExternalStorage
*/
-// NO_AUTOLOAD -- file scope code
-
+use MediaWiki\MainConfigNames;
+use MediaWiki\Maintenance\UndoLog;
use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\SqlBlobStore;
+use Wikimedia\AtEase\AtEase;
-define( 'REPORTING_INTERVAL', 1 );
+require_once __DIR__ . '/../Maintenance.php';
-if ( !defined( 'MEDIAWIKI' ) ) {
- $optionsWithArgs = [ 'e', 's' ];
- require_once __DIR__ . '/../CommandLineInc.php';
- require_once 'resolveStubs.php';
+class MoveToExternal extends Maintenance {
+ /** @var ResolveStubs */
+ private $resolveStubs;
+ /** @var int */
+ private $reportingInterval;
+ /** @var int */
+ private $minID;
+ /** @var int */
+ private $maxID;
+ /** @var string */
+ private $esType;
+ /** @var string */
+ private $esLocation;
+ /** @var int */
+ private $threshold;
+ /** @var bool */
+ private $gzip;
+ /** @var bool */
+ private $skipResolve;
+ /** @var string|null */
+ private $legacyEncoding;
+ /** @var bool */
+ private $dryRun;
+ /** @var UndoLog */
+ private $undoLog;
- $fname = 'moveToExternal';
+ public function __construct() {
+ parent::__construct();
- if ( !isset( $args[1] ) ) {
- print "Usage: php moveToExternal.php [-s <startid>] [-e <endid>] <type> <location>\n";
- exit;
- }
+ $this->setBatchSize( 1000 );
- $type = $args[0]; // e.g. "DB" or "mwstore"
- $location = $args[1]; // e.g. "cluster12" or "global-swift"
- $dbw = wfGetDB( DB_PRIMARY );
+ $this->addOption( 'start', 'start old_id', false, true, 's' );
+ $this->addOption( 'end', 'end old_id', false, true, 'e' );
+ $this->addOption( 'threshold', 'minimum size in bytes', false, true );
+ $this->addOption( 'reporting-interval',
+ 'show a message after this many revisions', false, true );
+ $this->addOption( 'undo', 'filename for undo SQL', false, true );
- $maxID = (int)( $options['e'] ?? $dbw->selectField( 'text', 'MAX(old_id)', '', $fname ) );
- $minID = (int)( $options['s'] ?? 1 );
+ $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' );
+ $this->addOption( 'skip-resolve',
+ 'Don\'t replace HistoryBlobStub objects with direct external store pointers' );
+ $this->addOption( 'iconv', 'Resolve legacy character encoding' );
+ $this->addOption( 'dry-run', 'Don\'t modify any rows' );
- moveToExternal( $type, $location, $maxID, $minID );
-}
-
-function moveToExternal( $type, $location, $maxID, $minID = 1 ) {
- $fname = 'moveToExternal';
- $dbw = wfGetDB( DB_PRIMARY );
- $dbr = wfGetDB( DB_REPLICA );
+ $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' );
+ $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' );
+ }
- $count = $maxID - $minID + 1;
- $blockSize = 1000;
- $numBlocks = ceil( $count / $blockSize );
- print "Moving text rows from $minID to $maxID to external storage\n";
+ public function execute() {
+ $this->resolveStubs = new ResolveStubs;
+ $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore"
+ $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift"
+ $dbw = $this->getDB( DB_PRIMARY );
- $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
- $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
- $extStore = $esFactory->getStore( $type );
- $numMoved = 0;
+ $maxID = $this->getOption( 'end' );
+ if ( $maxID === null ) {
+ $maxID = $dbw->selectField( 'text', 'MAX(old_id)', '', __METHOD__ );
+ }
+ $this->maxID = (int)$maxID;
+ $this->minID = (int)$this->getOption( 'start', 1 );
- for ( $block = 0; $block < $numBlocks; $block++ ) {
- $blockStart = $block * $blockSize + $minID;
- $blockEnd = $blockStart + $blockSize - 1;
+ $this->reportingInterval = $this->getOption( 'reporting-interval', 100 );
+ $this->threshold = (int)$this->getOption( 'threshold', 0 );
- if ( !( $block % REPORTING_INTERVAL ) ) {
- print "oldid=$blockStart, moved=$numMoved\n";
- $lbFactory->waitForReplication();
+ if ( $this->getOption( 'skip-gzip' ) ) {
+ $this->gzip = false;
+ } elseif ( !function_exists( 'gzdeflate' ) ) {
+ $this->fatalError( "gzdeflate() not found. " .
+ "Please run with --skip-gzip if you don't want to compress revisions." );
+ } else {
+ $this->gzip = true;
}
- $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
- [
- "old_id BETWEEN $blockStart AND $blockEnd",
- 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
- ], $fname
- );
- foreach ( $res as $row ) {
- # Resolve stubs
- $text = $row->old_text;
- $id = $row->old_id;
- if ( $row->old_flags === '' ) {
- $flags = 'external';
+ $this->skipResolve = $this->getOption( 'skip-resolve' );
+
+ if ( $this->getOption( 'iconv' ) ) {
+ $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding );
+ if ( $legacyEncoding ) {
+ $this->legacyEncoding = $legacyEncoding;
} else {
- $flags = "{$row->old_flags},external";
+ $this->output( "iconv requested but the wiki has no legacy encoding\n" );
+ }
+ }
+ $this->dryRun = $this->getOption( 'dry-run', false );
+
+ $undo = $this->getOption( 'undo' );
+ try {
+ $this->undoLog = new UndoLog( $undo, $dbw );
+ } catch ( RuntimeException $e ) {
+ $this->fatalError( "Unable to open undo log" );
+ }
+ $this->resolveStubs->setUndoLog( $this->undoLog );
+
+ $this->doMoveToExternal();
+ }
+
+ private function doMoveToExternal() {
+ $dbr = $this->getDB( DB_REPLICA );
+
+ $count = $this->maxID - $this->minID + 1;
+ $blockSize = $this->getBatchSize();
+ $numBlocks = ceil( $count / $blockSize );
+ print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n";
+
+ $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory();
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ $extStore = $esFactory->getStore( $this->esType );
+ $numMoved = 0;
+ $stubIDs = [];
+
+ for ( $block = 0; $block < $numBlocks; $block++ ) {
+ $blockStart = $block * $blockSize + $this->minID;
+ $blockEnd = $blockStart + $blockSize - 1;
+
+ if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) {
+ $this->output( "oldid=$blockStart, moved=$numMoved\n" );
+ $lbFactory->waitForReplication();
}
- if ( strpos( $flags, 'object' ) !== false ) {
- $obj = unserialize( $text );
- $className = strtolower( get_class( $obj ) );
- if ( $className == 'historyblobstub' ) {
- # resolveStub( $id, $row->old_text, $row->old_flags );
- # $numStubs++;
+ $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ],
+ [
+ "old_id BETWEEN $blockStart AND $blockEnd",
+ 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
+ ], __METHOD__
+ );
+ foreach ( $res as $row ) {
+ $text = $row->old_text;
+ $id = $row->old_id;
+ $flags = SqlBlobStore::explodeFlags( $row->old_flags );
+
+ if ( in_array( 'error', $flags ) ) {
+ continue;
+ } elseif ( in_array( 'object', $flags ) ) {
+ $obj = unserialize( $text );
+ if ( $obj instanceof HistoryBlobStub ) {
+ // Handle later, after CGZ resolution
+ if ( !$this->skipResolve ) {
+ $stubIDs[] = $id;
+ }
+ continue;
+ } elseif ( $obj instanceof HistoryBlobCurStub ) {
+ // Copy cur text to ES
+ [ $text, $flags ] = $this->compress( $obj->getText(), [ 'utf-8' ] );
+ } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) {
+ // Store as is
+ } else {
+ $className = get_class( $obj );
+ print "Warning: old_id=$id unrecognised object class \"$className\"\n";
+ continue;
+ }
+ } elseif ( strlen( $text ) < $this->threshold ) {
+ // Don't move small revisions
continue;
- } elseif ( $className == 'historyblobcurstub' ) {
- $text = gzdeflate( $obj->getText() );
- $flags = 'utf-8,gzip,external';
- } elseif ( $className == 'concatenatedgziphistoryblob' ) {
- // Do nothing
} else {
- print "Warning: unrecognised object class \"$className\"\n";
+ [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags );
+ [ $text, $flags ] = $this->compress( $text, $flags );
+ }
+ $flags[] = 'external';
+ $flagsString = implode( ',', $flags );
+
+ if ( $this->dryRun ) {
+ $this->output( "Move $id => $flagsString " .
+ addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) .
+ "\n"
+ );
continue;
}
- } else {
- $className = false;
- }
- if ( strlen( $text ) < 100 && $className === false ) {
- // Don't move tiny revisions
- continue;
+ $url = $extStore->store( $this->esLocation, $text );
+ if ( !$url ) {
+ $this->fatalError( "Error writing to external storage" );
+ }
+ $moved = $this->undoLog->update(
+ 'text',
+ [ 'old_flags' => $flagsString, 'old_text' => $url ],
+ (array)$row,
+ __METHOD__
+ );
+ if ( $moved ) {
+ $numMoved++;
+ } else {
+ print "Update of old_id $id failed, affected zero rows\n";
+ }
}
+ }
+
+ if ( count( $stubIDs ) ) {
+ $this->resolveStubs( $stubIDs );
+ }
+ }
+
+ private function compress( $text, $flags ) {
+ if ( $this->gzip && !in_array( 'gzip', $flags ) ) {
+ $flags[] = 'gzip';
+ $text = gzdeflate( $text );
+ }
+ return [ $text, $flags ];
+ }
+
+ private function resolveLegacyEncoding( $text, $flags ) {
+ if ( $this->legacyEncoding !== null
+ && !in_array( 'utf-8', $flags )
+ ) {
+ AtEase::suppressWarnings();
+ $text = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text );
+ AtEase::restoreWarnings();
+ $flags[] = 'utf-8';
+ }
+ return [ $text, $flags ];
+ }
- # print "Storing " . strlen( $text ) . " bytes to $url\n";
- # print "old_id=$id\n";
+ private function resolveStubs( $stubIDs ) {
+ if ( $this->dryRun ) {
+ print "Note: resolving stubs in dry run mode is expected to fail, " .
+ "because the main blobs have not been moved to external storage.\n";
+ }
- $url = $extStore->store( $location, $text );
- if ( !$url ) {
- print "Error writing to external storage\n";
- exit;
+ $dbr = $this->getDB( DB_REPLICA );
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+ $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" );
+ $numResolved = 0;
+ $numTotal = 0;
+ foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) {
+ $res = $dbr->select(
+ 'text',
+ [ 'old_id', 'old_flags', 'old_text' ],
+ [ 'old_id' => $stubBatch ],
+ __METHOD__
+ );
+ foreach ( $res as $row ) {
+ $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0;
+ $numTotal++;
+ if ( $this->reportingInterval
+ && $numTotal % $this->reportingInterval == 0
+ ) {
+ $this->output( "$numTotal stubs processed\n" );
+ $lbFactory->waitForReplication();
+ }
}
- $dbw->update( 'text',
- [ 'old_flags' => $flags, 'old_text' => $url ],
- [ 'old_id' => $id ], $fname );
- $numMoved++;
}
+ $this->output( "$numResolved of $numTotal stubs resolved\n" );
}
}
+
+$maintClass = MoveToExternal::class;
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php
index 9722a003f3f8..7d663b577105 100644
--- a/maintenance/storage/resolveStubs.php
+++ b/maintenance/storage/resolveStubs.php
@@ -22,101 +22,160 @@
* @ingroup Maintenance ExternalStorage
*/
+use MediaWiki\Maintenance\UndoLog;
use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\SqlBlobStore;
-if ( !defined( 'MEDIAWIKI' ) ) {
- $optionsWithArgs = [ 'm' ];
+require_once __DIR__ . '/../Maintenance.php';
- require_once __DIR__ . '/../CommandLineInc.php';
+class ResolveStubs extends Maintenance {
+ /** @var UndoLog|null */
+ private $undoLog;
- resolveStubs();
-}
+ public function __construct() {
+ parent::__construct();
+ $this->setBatchSize( 1000 );
+ $this->addOption( 'dry-run', 'Don\'t update any rows' );
+ $this->addOption( 'undo', 'Undo log location', false, true );
+ }
-/**
- * Convert history stubs that point to an external row to direct
- * external pointers
- */
-function resolveStubs() {
- $fname = 'resolveStubs';
-
- $dbr = wfGetDB( DB_REPLICA );
- $maxID = $dbr->selectField( 'text', 'MAX(old_id)', '', $fname );
- $blockSize = 10000;
- $numBlocks = intval( $maxID / $blockSize ) + 1;
- $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
-
- for ( $b = 0; $b < $numBlocks; $b++ ) {
- $lbFactory->waitForReplication();
-
- printf( "%5.2f%%\n", $b / $numBlocks * 100 );
- $start = intval( $maxID / $numBlocks ) * $b + 1;
- $end = intval( $maxID / $numBlocks ) * ( $b + 1 );
-
- $res = $dbr->select( 'text', [ 'old_id', 'old_text', 'old_flags' ],
- "old_id>=$start AND old_id<=$end " .
- "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " .
- 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
- $fname );
- foreach ( $res as $row ) {
- resolveStub( $row->old_id, $row->old_text, $row->old_flags );
+ /**
+ * Convert history stubs that point to an external row to direct
+ * external pointers
+ */
+ public function execute() {
+ $dbw = $this->getDB( DB_PRIMARY );
+ $dbr = $this->getDB( DB_REPLICA );
+ $maxID = $dbr->selectField( 'text', 'MAX(old_id)', '', __METHOD__ );
+ $blockSize = $this->getBatchSize();
+ $dryRun = $this->getOption( 'dry-run' );
+ $this->setUndoLog( new UndoLog( $this->getOption( 'undo' ), $dbw ) );
+
+ $numBlocks = intval( $maxID / $blockSize ) + 1;
+ $numResolved = 0;
+ $numTotal = 0;
+ $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+
+ for ( $b = 0; $b < $numBlocks; $b++ ) {
+ $lbFactory->waitForReplication();
+
+ $this->output( sprintf( "%5.2f%%\n", $b / $numBlocks * 100 ) );
+ $start = $blockSize * $b + 1;
+ $end = $blockSize * ( $b + 1 );
+
+ $res = $dbr->select( 'text', [ 'old_id', 'old_text', 'old_flags' ],
+ "old_id>=$start AND old_id<=$end " .
+ "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " .
+ // LOWER() doesn't work on binary text, need to convert
+ 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'',
+ __METHOD__ );
+ foreach ( $res as $row ) {
+ $numResolved += $this->resolveStub( $row, $dryRun ) ? 1 : 0;
+ $numTotal++;
+ }
}
+ $this->output( "100%\n" );
+ $this->output( "$numResolved of $numTotal stubs resolved\n" );
}
- print "100%\n";
-}
-/**
- * Resolve a history stub
- * @param int $id
- * @param string $stubText
- * @param string $flags
- */
-function resolveStub( $id, $stubText, $flags ) {
- $fname = 'resolveStub';
+ /**
+ * @param UndoLog $undoLog
+ */
+ public function setUndoLog( UndoLog $undoLog ) {
+ $this->undoLog = $undoLog;
+ }
- $stub = unserialize( $stubText );
- $flags = explode( ',', $flags );
+ /**
+ * Resolve a history stub.
+ *
+ * This is called by MoveToExternal
+ *
+ * @param stdClass $row The existing text row
+ * @param bool $dryRun
+ * @return bool
+ */
+ public function resolveStub( $row, $dryRun ) {
+ $id = $row->old_id;
+ $stub = unserialize( $row->old_text );
+ $flags = SqlBlobStore::explodeFlags( $row->old_flags );
+
+ $dbr = $this->getDB( DB_REPLICA );
+ $dbw = $this->getDB( DB_PRIMARY );
+
+ if ( !( $stub instanceof HistoryBlobStub ) ) {
+ print "Error at old_id $id: found object of class " . get_class( $stub ) .
+ ", expecting HistoryBlobStub\n";
+ return false;
+ }
- $dbr = wfGetDB( DB_REPLICA );
- $dbw = wfGetDB( DB_PRIMARY );
+ $mainId = $stub->getLocation();
+ if ( !$mainId ) {
+ print "Error at old_id $id: falsey location\n";
+ return false;
+ }
- if ( strtolower( get_class( $stub ) ) !== 'historyblobstub' ) {
- print "Error found object of class " . get_class( $stub ) . ", expecting historyblobstub\n";
+ # Get the main text row
+ $mainTextRow = $dbr->selectRow(
+ 'text',
+ [ 'old_text', 'old_flags' ],
+ [ 'old_id' => $mainId ],
+ __METHOD__
+ );
+
+ if ( !$mainTextRow ) {
+ print "Error at old_id $id: can't find main text row old_id $mainId\n";
+ return false;
+ }
- return;
- }
+ $mainFlags = SqlBlobStore::explodeFlags( $mainTextRow->old_flags );
+ $mainText = $mainTextRow->old_text;
- # Get the (maybe) external row
- $externalRow = $dbr->selectRow(
- 'text',
- [ 'old_text' ],
- [
- 'old_id' => $stub->getLocation(),
- 'old_flags' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() )
- ],
- $fname
- );
-
- if ( !$externalRow ) {
- # Object wasn't external
- return;
- }
+ if ( !in_array( 'external', $mainFlags ) ) {
+ print "Error at old_id $id: target $mainId is not external\n";
+ return false;
+ }
+ if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9a-f]{32}$!', $mainText ) ) {
+ print "Error at old_id $id: target $mainId is a CGZ pointer\n";
+ return false;
+ }
+ if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9]{1,6}$!', $mainText ) ) {
+ print "Error at old_id $id: target $mainId is a DHB pointer\n";
+ return false;
+ }
+ if ( !preg_match( '!^DB://([^/]*)/([^/]*)$!', $mainText ) ) {
+ print "Error at old_id $id: target $mainId has unrecognised text\n";
+ return false;
+ }
- # Preserve the legacy encoding flag, but switch from object to external
- if ( in_array( 'utf-8', $flags ) ) {
- $newFlags = 'external,utf-8';
- } else {
- $newFlags = 'external';
+ # Preserve the legacy encoding flag, but switch from object to external
+ if ( in_array( 'utf-8', $flags ) ) {
+ $newFlags = 'utf-8,external';
+ } else {
+ $newFlags = 'external';
+ }
+ $newText = $mainText . '/' . $stub->getHash();
+
+ # Update the row
+ if ( $dryRun ) {
+ $this->output( "Resolve $id => $newFlags $newText\n" );
+ } else {
+ $updated = $this->undoLog->update(
+ 'text',
+ [
+ 'old_flags' => $newFlags,
+ 'old_text' => $newText
+ ],
+ (array)$row,
+ __METHOD__
+ );
+ if ( !$updated ) {
+ $this->output( "Updated of old_id $id failed to match\n" );
+ return false;
+ }
+ }
+ return true;
}
-
- # Update the row
- # print "oldid=$id\n";
- $dbw->update( 'text',
- [ /* SET */
- 'old_flags' => $newFlags,
- 'old_text' => $externalRow->old_text . '/' . $stub->getHash()
- ],
- [ /* WHERE */
- 'old_id' => $id
- ], $fname
- );
}
+
+$maintClass = ResolveStubs::class;
+require_once RUN_MAINTENANCE_IF_MAIN;