diff options
author | Tim Starling <tstarling@wikimedia.org> | 2022-11-21 09:23:34 +1100 |
---|---|---|
committer | Tim Starling <tstarling@wikimedia.org> | 2022-12-20 13:43:44 +1100 |
commit | 096ea232082ccd9f43ebd880f20221d3fcb0ca43 (patch) | |
tree | 7785567ec0033b8830aba9a1c3d6420ff46c2358 | |
parent | 850049bd5fc288b3e52ac22d056996d35b5149e5 (diff) | |
download | mediawikicore-096ea232082ccd9f43ebd880f20221d3fcb0ca43.tar.gz mediawikicore-096ea232082ccd9f43ebd880f20221d3fcb0ca43.zip |
Update moveToExternal and resolveStubs
Convert these two old scripts to Maintenance subclasses.
* Uncomment the resolveStub() call in moveToExternal and fix one obvious
bug with it, i.e. the fact that stubs need to be resolved after CGZ
blobs are moved.
* Replace get_class() with instanceof.
* Make the "tiny text" threshold configurable. Normally this is not
wanted in WMF production since new revisions are written to ES
unconditionally.
* Add a dry run mode.
* Add an undo log.
* Add --skip-resolve option.
* Make resolveStub() be much more defensive about what it resolves.
* In moveToExternal, make compression optional and do it also for plain
text.
* Optionally convert the legacy encoding to UTF-8.
Bug: T299387
Change-Id: I52d54e3b6b785ac072796031be06499221340f51
-rw-r--r-- | autoload.php | 3 | ||||
-rw-r--r-- | includes/Maintenance/UndoLog.php | 55 | ||||
-rw-r--r-- | includes/Storage/SqlBlobStore.php | 12 | ||||
-rw-r--r-- | maintenance/storage/moveToExternal.php | 306 | ||||
-rw-r--r-- | maintenance/storage/resolveStubs.php | 223 |
5 files changed, 436 insertions, 163 deletions
diff --git a/autoload.php b/autoload.php index 80460a3216f6..b0ecbecfafe8 100644 --- a/autoload.php +++ b/autoload.php @@ -1484,6 +1484,7 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Maintenance\\MaintenanceParameters' => __DIR__ . '/maintenance/includes/MaintenanceParameters.php', 'MediaWiki\\Maintenance\\MaintenanceRunner' => __DIR__ . '/maintenance/includes/MaintenanceRunner.php', 'MediaWiki\\Maintenance\\OrderedStreamingForkController' => __DIR__ . '/includes/Maintenance/OrderedStreamingForkController.php', + 'MediaWiki\\Maintenance\\UndoLog' => __DIR__ . '/includes/Maintenance/UndoLog.php', 'MediaWiki\\MediaWikiServices' => __DIR__ . '/includes/MediaWikiServices.php', 'MediaWiki\\Message\\Converter' => __DIR__ . '/includes/Message/Converter.php', 'MediaWiki\\Message\\MessageFormatterFactory' => __DIR__ . '/includes/Message/MessageFormatterFactory.php', @@ -2039,6 +2040,7 @@ $wgAutoloadLocalClasses = [ 'MoveLogFormatter' => __DIR__ . '/includes/logging/MoveLogFormatter.php', 'MovePage' => __DIR__ . '/includes/MovePage.php', 'MovePageForm' => __DIR__ . '/includes/specials/SpecialMovepage.php', + 'MoveToExternal' => __DIR__ . '/maintenance/storage/moveToExternal.php', 'MultiConfig' => __DIR__ . '/includes/config/MultiConfig.php', 'MultiHttpClient' => __DIR__ . '/includes/libs/http/MultiHttpClient.php', 'MultiWriteBagOStuff' => __DIR__ . '/includes/libs/objectcache/MultiWriteBagOStuff.php', @@ -2239,6 +2241,7 @@ $wgAutoloadLocalClasses = [ 'ResetAuthenticationThrottle' => __DIR__ . '/maintenance/resetAuthenticationThrottle.php', 'ResetPageRandom' => __DIR__ . '/maintenance/resetPageRandom.php', 'ResetUserEmail' => __DIR__ . '/maintenance/resetUserEmail.php', + 'ResolveStubs' => __DIR__ . '/maintenance/storage/resolveStubs.php', 'ResourceFileCache' => __DIR__ . '/includes/cache/ResourceFileCache.php', 'ResourceLoader' => __DIR__ . '/includes/ResourceLoader/ResourceLoader.php', 'ResourceLoaderCircularDependencyError' => __DIR__ . '/includes/ResourceLoader/CircularDependencyError.php', diff --git a/includes/Maintenance/UndoLog.php b/includes/Maintenance/UndoLog.php new file mode 100644 index 000000000000..1cd7e3c11481 --- /dev/null +++ b/includes/Maintenance/UndoLog.php @@ -0,0 +1,55 @@ +<?php + +namespace MediaWiki\Maintenance; + +use Wikimedia\Rdbms\IDatabase; + +/** + * Update a database while optionally writing SQL that reverses the update to + * a file. + */ +class UndoLog { + private $file; + private $dbw; + + /** + * @param string|null $fileName + * @param IDatabase $dbw + */ + public function __construct( $fileName, IDatabase $dbw ) { + if ( $fileName !== null ) { + $this->file = fopen( $fileName, 'a' ); + if ( !$this->file ) { + throw new \RuntimeException( 'Unable to open undo log' ); + } + } + $this->dbw = $dbw; + } + + /** + * @param string $table + * @param array $newValues + * @param array $oldValues + * @param string $fname + * @return bool + */ + public function update( $table, array $newValues, array $oldValues, $fname ) { + $this->dbw->update( + $table, + $newValues, + $oldValues, + $fname + ); + $updated = (bool)$this->dbw->affectedRows(); + if ( $this->file && $updated ) { + $table = $this->dbw->tableName( $table ); + fwrite( + $this->file, + "UPDATE $table" . + ' SET ' . $this->dbw->makeList( $oldValues, IDatabase::LIST_SET ) . + ' WHERE ' . $this->dbw->makeList( $newValues, IDatabase::LIST_AND ) . ";\n" + ); + } + return $updated; + } +} diff --git a/includes/Storage/SqlBlobStore.php b/includes/Storage/SqlBlobStore.php index 76c73985f590..bf611ddc9504 100644 --- a/includes/Storage/SqlBlobStore.php +++ b/includes/Storage/SqlBlobStore.php @@ -473,7 +473,7 @@ class SqlBlobStore implements IDBAccessObject, BlobStore { */ public function expandBlob( $raw, $flags, $blobAddress = null ) { if ( is_string( $flags ) ) { - $flags = explode( ',', $flags ); + $flags = self::explodeFlags( $flags ); } // Use external methods for external objects, text in table is URL-only then @@ -685,6 +685,16 @@ class SqlBlobStore implements IDBAccessObject, BlobStore { } /** + * Split a comma-separated old_flags value into its constituent parts + * + * @param string $flagsString + * @return array + */ + public static function explodeFlags( string $flagsString ) { + return $flagsString === '' ? [] : explode( ',', $flagsString ); + } + + /** * Splits a blob address into three parts: the schema, the ID, and parameters/flags. * * @since 1.33 diff --git a/maintenance/storage/moveToExternal.php b/maintenance/storage/moveToExternal.php index b3328352b529..cf729c3f13e5 100644 --- a/maintenance/storage/moveToExternal.php +++ b/maintenance/storage/moveToExternal.php @@ -1,6 +1,6 @@ <?php /** - * Move revision's text to external storage + * Move text from the text table to external storage * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,111 +21,257 @@ * @ingroup Maintenance ExternalStorage */ -// NO_AUTOLOAD -- file scope code - +use MediaWiki\MainConfigNames; +use MediaWiki\Maintenance\UndoLog; use MediaWiki\MediaWikiServices; +use MediaWiki\Storage\SqlBlobStore; +use Wikimedia\AtEase\AtEase; -define( 'REPORTING_INTERVAL', 1 ); +require_once __DIR__ . '/../Maintenance.php'; -if ( !defined( 'MEDIAWIKI' ) ) { - $optionsWithArgs = [ 'e', 's' ]; - require_once __DIR__ . '/../CommandLineInc.php'; - require_once 'resolveStubs.php'; +class MoveToExternal extends Maintenance { + /** @var ResolveStubs */ + private $resolveStubs; + /** @var int */ + private $reportingInterval; + /** @var int */ + private $minID; + /** @var int */ + private $maxID; + /** @var string */ + private $esType; + /** @var string */ + private $esLocation; + /** @var int */ + private $threshold; + /** @var bool */ + private $gzip; + /** @var bool */ + private $skipResolve; + /** @var string|null */ + private $legacyEncoding; + /** @var bool */ + private $dryRun; + /** @var UndoLog */ + private $undoLog; - $fname = 'moveToExternal'; + public function __construct() { + parent::__construct(); - if ( !isset( $args[1] ) ) { - print "Usage: php moveToExternal.php [-s <startid>] [-e <endid>] <type> <location>\n"; - exit; - } + $this->setBatchSize( 1000 ); - $type = $args[0]; // e.g. "DB" or "mwstore" - $location = $args[1]; // e.g. "cluster12" or "global-swift" - $dbw = wfGetDB( DB_PRIMARY ); + $this->addOption( 'start', 'start old_id', false, true, 's' ); + $this->addOption( 'end', 'end old_id', false, true, 'e' ); + $this->addOption( 'threshold', 'minimum size in bytes', false, true ); + $this->addOption( 'reporting-interval', + 'show a message after this many revisions', false, true ); + $this->addOption( 'undo', 'filename for undo SQL', false, true ); - $maxID = (int)( $options['e'] ?? $dbw->selectField( 'text', 'MAX(old_id)', '', $fname ) ); - $minID = (int)( $options['s'] ?? 1 ); + $this->addOption( 'skip-gzip', 'Don\'t compress individual revisions' ); + $this->addOption( 'skip-resolve', + 'Don\'t replace HistoryBlobStub objects with direct external store pointers' ); + $this->addOption( 'iconv', 'Resolve legacy character encoding' ); + $this->addOption( 'dry-run', 'Don\'t modify any rows' ); - moveToExternal( $type, $location, $maxID, $minID ); -} - -function moveToExternal( $type, $location, $maxID, $minID = 1 ) { - $fname = 'moveToExternal'; - $dbw = wfGetDB( DB_PRIMARY ); - $dbr = wfGetDB( DB_REPLICA ); + $this->addArg( 'type', 'The external store type, e.g. "DB" or "mwstore"' ); + $this->addArg( 'location', 'e.g. "cluster12" or "global-swift"' ); + } - $count = $maxID - $minID + 1; - $blockSize = 1000; - $numBlocks = ceil( $count / $blockSize ); - print "Moving text rows from $minID to $maxID to external storage\n"; + public function execute() { + $this->resolveStubs = new ResolveStubs; + $this->esType = $this->getArg( 0 ); // e.g. "DB" or "mwstore" + $this->esLocation = $this->getArg( 1 ); // e.g. "cluster12" or "global-swift" + $dbw = $this->getDB( DB_PRIMARY ); - $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory(); - $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); - $extStore = $esFactory->getStore( $type ); - $numMoved = 0; + $maxID = $this->getOption( 'end' ); + if ( $maxID === null ) { + $maxID = $dbw->selectField( 'text', 'MAX(old_id)', '', __METHOD__ ); + } + $this->maxID = (int)$maxID; + $this->minID = (int)$this->getOption( 'start', 1 ); - for ( $block = 0; $block < $numBlocks; $block++ ) { - $blockStart = $block * $blockSize + $minID; - $blockEnd = $blockStart + $blockSize - 1; + $this->reportingInterval = $this->getOption( 'reporting-interval', 100 ); + $this->threshold = (int)$this->getOption( 'threshold', 0 ); - if ( !( $block % REPORTING_INTERVAL ) ) { - print "oldid=$blockStart, moved=$numMoved\n"; - $lbFactory->waitForReplication(); + if ( $this->getOption( 'skip-gzip' ) ) { + $this->gzip = false; + } elseif ( !function_exists( 'gzdeflate' ) ) { + $this->fatalError( "gzdeflate() not found. " . + "Please run with --skip-gzip if you don't want to compress revisions." ); + } else { + $this->gzip = true; } - $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ], - [ - "old_id BETWEEN $blockStart AND $blockEnd", - 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), - ], $fname - ); - foreach ( $res as $row ) { - # Resolve stubs - $text = $row->old_text; - $id = $row->old_id; - if ( $row->old_flags === '' ) { - $flags = 'external'; + $this->skipResolve = $this->getOption( 'skip-resolve' ); + + if ( $this->getOption( 'iconv' ) ) { + $legacyEncoding = $this->getConfig()->get( MainConfigNames::LegacyEncoding ); + if ( $legacyEncoding ) { + $this->legacyEncoding = $legacyEncoding; } else { - $flags = "{$row->old_flags},external"; + $this->output( "iconv requested but the wiki has no legacy encoding\n" ); + } + } + $this->dryRun = $this->getOption( 'dry-run', false ); + + $undo = $this->getOption( 'undo' ); + try { + $this->undoLog = new UndoLog( $undo, $dbw ); + } catch ( RuntimeException $e ) { + $this->fatalError( "Unable to open undo log" ); + } + $this->resolveStubs->setUndoLog( $this->undoLog ); + + $this->doMoveToExternal(); + } + + private function doMoveToExternal() { + $dbr = $this->getDB( DB_REPLICA ); + + $count = $this->maxID - $this->minID + 1; + $blockSize = $this->getBatchSize(); + $numBlocks = ceil( $count / $blockSize ); + print "Moving text rows from {$this->minID} to {$this->maxID} to external storage\n"; + + $esFactory = MediaWikiServices::getInstance()->getExternalStoreFactory(); + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $extStore = $esFactory->getStore( $this->esType ); + $numMoved = 0; + $stubIDs = []; + + for ( $block = 0; $block < $numBlocks; $block++ ) { + $blockStart = $block * $blockSize + $this->minID; + $blockEnd = $blockStart + $blockSize - 1; + + if ( $this->reportingInterval && !( $block % $this->reportingInterval ) ) { + $this->output( "oldid=$blockStart, moved=$numMoved\n" ); + $lbFactory->waitForReplication(); } - if ( strpos( $flags, 'object' ) !== false ) { - $obj = unserialize( $text ); - $className = strtolower( get_class( $obj ) ); - if ( $className == 'historyblobstub' ) { - # resolveStub( $id, $row->old_text, $row->old_flags ); - # $numStubs++; + $res = $dbr->select( 'text', [ 'old_id', 'old_flags', 'old_text' ], + [ + "old_id BETWEEN $blockStart AND $blockEnd", + 'old_flags NOT ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), + ], __METHOD__ + ); + foreach ( $res as $row ) { + $text = $row->old_text; + $id = $row->old_id; + $flags = SqlBlobStore::explodeFlags( $row->old_flags ); + + if ( in_array( 'error', $flags ) ) { + continue; + } elseif ( in_array( 'object', $flags ) ) { + $obj = unserialize( $text ); + if ( $obj instanceof HistoryBlobStub ) { + // Handle later, after CGZ resolution + if ( !$this->skipResolve ) { + $stubIDs[] = $id; + } + continue; + } elseif ( $obj instanceof HistoryBlobCurStub ) { + // Copy cur text to ES + [ $text, $flags ] = $this->compress( $obj->getText(), [ 'utf-8' ] ); + } elseif ( $obj instanceof ConcatenatedGzipHistoryBlob ) { + // Store as is + } else { + $className = get_class( $obj ); + print "Warning: old_id=$id unrecognised object class \"$className\"\n"; + continue; + } + } elseif ( strlen( $text ) < $this->threshold ) { + // Don't move small revisions continue; - } elseif ( $className == 'historyblobcurstub' ) { - $text = gzdeflate( $obj->getText() ); - $flags = 'utf-8,gzip,external'; - } elseif ( $className == 'concatenatedgziphistoryblob' ) { - // Do nothing } else { - print "Warning: unrecognised object class \"$className\"\n"; + [ $text, $flags ] = $this->resolveLegacyEncoding( $text, $flags ); + [ $text, $flags ] = $this->compress( $text, $flags ); + } + $flags[] = 'external'; + $flagsString = implode( ',', $flags ); + + if ( $this->dryRun ) { + $this->output( "Move $id => $flagsString " . + addcslashes( substr( $text, 0, 30 ), "\0..\x1f\x7f..\xff" ) . + "\n" + ); continue; } - } else { - $className = false; - } - if ( strlen( $text ) < 100 && $className === false ) { - // Don't move tiny revisions - continue; + $url = $extStore->store( $this->esLocation, $text ); + if ( !$url ) { + $this->fatalError( "Error writing to external storage" ); + } + $moved = $this->undoLog->update( + 'text', + [ 'old_flags' => $flagsString, 'old_text' => $url ], + (array)$row, + __METHOD__ + ); + if ( $moved ) { + $numMoved++; + } else { + print "Update of old_id $id failed, affected zero rows\n"; + } } + } + + if ( count( $stubIDs ) ) { + $this->resolveStubs( $stubIDs ); + } + } + + private function compress( $text, $flags ) { + if ( $this->gzip && !in_array( 'gzip', $flags ) ) { + $flags[] = 'gzip'; + $text = gzdeflate( $text ); + } + return [ $text, $flags ]; + } + + private function resolveLegacyEncoding( $text, $flags ) { + if ( $this->legacyEncoding !== null + && !in_array( 'utf-8', $flags ) + ) { + AtEase::suppressWarnings(); + $text = iconv( $this->legacyEncoding, 'UTF-8//IGNORE', $text ); + AtEase::restoreWarnings(); + $flags[] = 'utf-8'; + } + return [ $text, $flags ]; + } - # print "Storing " . strlen( $text ) . " bytes to $url\n"; - # print "old_id=$id\n"; + private function resolveStubs( $stubIDs ) { + if ( $this->dryRun ) { + print "Note: resolving stubs in dry run mode is expected to fail, " . + "because the main blobs have not been moved to external storage.\n"; + } - $url = $extStore->store( $location, $text ); - if ( !$url ) { - print "Error writing to external storage\n"; - exit; + $dbr = $this->getDB( DB_REPLICA ); + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $this->output( "Resolving " . count( $stubIDs ) . " stubs\n" ); + $numResolved = 0; + $numTotal = 0; + foreach ( array_chunk( $stubIDs, $this->getBatchSize() ) as $stubBatch ) { + $res = $dbr->select( + 'text', + [ 'old_id', 'old_flags', 'old_text' ], + [ 'old_id' => $stubBatch ], + __METHOD__ + ); + foreach ( $res as $row ) { + $numResolved += $this->resolveStubs->resolveStub( $row, $this->dryRun ) ? 1 : 0; + $numTotal++; + if ( $this->reportingInterval + && $numTotal % $this->reportingInterval == 0 + ) { + $this->output( "$numTotal stubs processed\n" ); + $lbFactory->waitForReplication(); + } } - $dbw->update( 'text', - [ 'old_flags' => $flags, 'old_text' => $url ], - [ 'old_id' => $id ], $fname ); - $numMoved++; } + $this->output( "$numResolved of $numTotal stubs resolved\n" ); } } + +$maintClass = MoveToExternal::class; +require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/storage/resolveStubs.php b/maintenance/storage/resolveStubs.php index 9722a003f3f8..7d663b577105 100644 --- a/maintenance/storage/resolveStubs.php +++ b/maintenance/storage/resolveStubs.php @@ -22,101 +22,160 @@ * @ingroup Maintenance ExternalStorage */ +use MediaWiki\Maintenance\UndoLog; use MediaWiki\MediaWikiServices; +use MediaWiki\Storage\SqlBlobStore; -if ( !defined( 'MEDIAWIKI' ) ) { - $optionsWithArgs = [ 'm' ]; +require_once __DIR__ . '/../Maintenance.php'; - require_once __DIR__ . '/../CommandLineInc.php'; +class ResolveStubs extends Maintenance { + /** @var UndoLog|null */ + private $undoLog; - resolveStubs(); -} + public function __construct() { + parent::__construct(); + $this->setBatchSize( 1000 ); + $this->addOption( 'dry-run', 'Don\'t update any rows' ); + $this->addOption( 'undo', 'Undo log location', false, true ); + } -/** - * Convert history stubs that point to an external row to direct - * external pointers - */ -function resolveStubs() { - $fname = 'resolveStubs'; - - $dbr = wfGetDB( DB_REPLICA ); - $maxID = $dbr->selectField( 'text', 'MAX(old_id)', '', $fname ); - $blockSize = 10000; - $numBlocks = intval( $maxID / $blockSize ) + 1; - $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); - - for ( $b = 0; $b < $numBlocks; $b++ ) { - $lbFactory->waitForReplication(); - - printf( "%5.2f%%\n", $b / $numBlocks * 100 ); - $start = intval( $maxID / $numBlocks ) * $b + 1; - $end = intval( $maxID / $numBlocks ) * ( $b + 1 ); - - $res = $dbr->select( 'text', [ 'old_id', 'old_text', 'old_flags' ], - "old_id>=$start AND old_id<=$end " . - "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " . - 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', - $fname ); - foreach ( $res as $row ) { - resolveStub( $row->old_id, $row->old_text, $row->old_flags ); + /** + * Convert history stubs that point to an external row to direct + * external pointers + */ + public function execute() { + $dbw = $this->getDB( DB_PRIMARY ); + $dbr = $this->getDB( DB_REPLICA ); + $maxID = $dbr->selectField( 'text', 'MAX(old_id)', '', __METHOD__ ); + $blockSize = $this->getBatchSize(); + $dryRun = $this->getOption( 'dry-run' ); + $this->setUndoLog( new UndoLog( $this->getOption( 'undo' ), $dbw ) ); + + $numBlocks = intval( $maxID / $blockSize ) + 1; + $numResolved = 0; + $numTotal = 0; + $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + + for ( $b = 0; $b < $numBlocks; $b++ ) { + $lbFactory->waitForReplication(); + + $this->output( sprintf( "%5.2f%%\n", $b / $numBlocks * 100 ) ); + $start = $blockSize * $b + 1; + $end = $blockSize * ( $b + 1 ); + + $res = $dbr->select( 'text', [ 'old_id', 'old_text', 'old_flags' ], + "old_id>=$start AND old_id<=$end " . + "AND old_flags LIKE '%object%' AND old_flags NOT LIKE '%external%' " . + // LOWER() doesn't work on binary text, need to convert + 'AND LOWER(CONVERT(LEFT(old_text,22) USING latin1)) = \'o:15:"historyblobstub"\'', + __METHOD__ ); + foreach ( $res as $row ) { + $numResolved += $this->resolveStub( $row, $dryRun ) ? 1 : 0; + $numTotal++; + } } + $this->output( "100%\n" ); + $this->output( "$numResolved of $numTotal stubs resolved\n" ); } - print "100%\n"; -} -/** - * Resolve a history stub - * @param int $id - * @param string $stubText - * @param string $flags - */ -function resolveStub( $id, $stubText, $flags ) { - $fname = 'resolveStub'; + /** + * @param UndoLog $undoLog + */ + public function setUndoLog( UndoLog $undoLog ) { + $this->undoLog = $undoLog; + } - $stub = unserialize( $stubText ); - $flags = explode( ',', $flags ); + /** + * Resolve a history stub. + * + * This is called by MoveToExternal + * + * @param stdClass $row The existing text row + * @param bool $dryRun + * @return bool + */ + public function resolveStub( $row, $dryRun ) { + $id = $row->old_id; + $stub = unserialize( $row->old_text ); + $flags = SqlBlobStore::explodeFlags( $row->old_flags ); + + $dbr = $this->getDB( DB_REPLICA ); + $dbw = $this->getDB( DB_PRIMARY ); + + if ( !( $stub instanceof HistoryBlobStub ) ) { + print "Error at old_id $id: found object of class " . get_class( $stub ) . + ", expecting HistoryBlobStub\n"; + return false; + } - $dbr = wfGetDB( DB_REPLICA ); - $dbw = wfGetDB( DB_PRIMARY ); + $mainId = $stub->getLocation(); + if ( !$mainId ) { + print "Error at old_id $id: falsey location\n"; + return false; + } - if ( strtolower( get_class( $stub ) ) !== 'historyblobstub' ) { - print "Error found object of class " . get_class( $stub ) . ", expecting historyblobstub\n"; + # Get the main text row + $mainTextRow = $dbr->selectRow( + 'text', + [ 'old_text', 'old_flags' ], + [ 'old_id' => $mainId ], + __METHOD__ + ); + + if ( !$mainTextRow ) { + print "Error at old_id $id: can't find main text row old_id $mainId\n"; + return false; + } - return; - } + $mainFlags = SqlBlobStore::explodeFlags( $mainTextRow->old_flags ); + $mainText = $mainTextRow->old_text; - # Get the (maybe) external row - $externalRow = $dbr->selectRow( - 'text', - [ 'old_text' ], - [ - 'old_id' => $stub->getLocation(), - 'old_flags' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ) - ], - $fname - ); - - if ( !$externalRow ) { - # Object wasn't external - return; - } + if ( !in_array( 'external', $mainFlags ) ) { + print "Error at old_id $id: target $mainId is not external\n"; + return false; + } + if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9a-f]{32}$!', $mainText ) ) { + print "Error at old_id $id: target $mainId is a CGZ pointer\n"; + return false; + } + if ( preg_match( '!^DB://([^/]*)/([^/]*)/[0-9]{1,6}$!', $mainText ) ) { + print "Error at old_id $id: target $mainId is a DHB pointer\n"; + return false; + } + if ( !preg_match( '!^DB://([^/]*)/([^/]*)$!', $mainText ) ) { + print "Error at old_id $id: target $mainId has unrecognised text\n"; + return false; + } - # Preserve the legacy encoding flag, but switch from object to external - if ( in_array( 'utf-8', $flags ) ) { - $newFlags = 'external,utf-8'; - } else { - $newFlags = 'external'; + # Preserve the legacy encoding flag, but switch from object to external + if ( in_array( 'utf-8', $flags ) ) { + $newFlags = 'utf-8,external'; + } else { + $newFlags = 'external'; + } + $newText = $mainText . '/' . $stub->getHash(); + + # Update the row + if ( $dryRun ) { + $this->output( "Resolve $id => $newFlags $newText\n" ); + } else { + $updated = $this->undoLog->update( + 'text', + [ + 'old_flags' => $newFlags, + 'old_text' => $newText + ], + (array)$row, + __METHOD__ + ); + if ( !$updated ) { + $this->output( "Updated of old_id $id failed to match\n" ); + return false; + } + } + return true; } - - # Update the row - # print "oldid=$id\n"; - $dbw->update( 'text', - [ /* SET */ - 'old_flags' => $newFlags, - 'old_text' => $externalRow->old_text . '/' . $stub->getHash() - ], - [ /* WHERE */ - 'old_id' => $id - ], $fname - ); } + +$maintClass = ResolveStubs::class; +require_once RUN_MAINTENANCE_IF_MAIN; |