addDescription( 'Populates normalization column in links tables.' ); $this->addOption( 'table', 'Table name. Like pagelinks.', true, true ); $this->addOption( 'sleep', 'Sleep time (in seconds) between every batch. Default: 0', false, true ); $this->setBatchSize( 1000 ); } protected function getUpdateKey() { return __CLASS__ . $this->getOption( 'table', '' ); } protected function doDBUpdates() { $dbw = $this->getDB( DB_PRIMARY ); $mapping = \MediaWiki\Linker\LinksMigration::$mapping; $table = $this->getOption( 'table', '' ); if ( !isset( $mapping[$table] ) ) { $this->output( "Mapping for this table doesn't exist yet.\n" ); return false; } $targetColumn = $mapping[$table]['target_id']; if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) { $this->output( "Old fields don't exist. There is no need to run this script\n" ); return true; } if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) { $this->output( "Run update.php to create the $targetColumn column.\n" ); return false; } if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) { $this->output( "Run update.php to create the linktarget table.\n" ); return true; } $this->output( "Populating the $targetColumn column\n" ); $updated = 0; $highestPageId = $dbw->newSelectQueryBuilder() ->select( 'page_id' ) ->from( 'page' ) ->limit( 1 ) ->caller( __METHOD__ ) ->orderBy( 'page_id', 'DESC' ) ->fetchResultSet()->fetchRow(); if ( !$highestPageId ) { $this->output( "Page table is empty.\n" ); return true; } $highestPageId = $highestPageId[0]; $pageId = 0; while ( $pageId <= $highestPageId ) { // Given the indexes and the structure of links tables, // we need to split the update into batches of pages. // Otherwise the queries will take a really long time in production and cause read-only. $updated += $this->handlePageBatch( $pageId, $mapping, $table ); $pageId += $this->getBatchSize(); } $this->output( "Completed normalization of $table, $updated rows updated.\n" ); return true; } private function handlePageBatch( $lowPageId, $mapping, $table ) { $batchSize = $this->getBatchSize(); $targetColumn = $mapping[$table]['target_id']; $pageIdColumn = $mapping[$table]['page_id']; // range is inclusive, let's subtract one. $highPageId = $lowPageId + $batchSize - 1; $dbw = $this->getPrimaryDB(); $updated = 0; while ( true ) { $res = $dbw->newSelectQueryBuilder() ->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] ) ->from( $table ) ->where( [ $targetColumn => [ null, 0 ], $dbw->expr( $pageIdColumn, '>=', $lowPageId ), $dbw->expr( $pageIdColumn, '<=', $highPageId ), ] ) ->limit( 1 ) ->caller( __METHOD__ ) ->fetchResultSet(); if ( !$res->numRows() ) { break; } $row = $res->fetchRow(); $ns = $row[$mapping[$table]['ns']]; $titleString = $row[$mapping[$table]['title']]; $title = new TitleValue( (int)$ns, $titleString ); $this->output( "Starting backfill of $ns:$titleString " . "title on pages between $lowPageId and $highPageId\n" ); $id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw ); $dbw->newUpdateQueryBuilder() ->update( $table ) ->set( [ $targetColumn => $id ] ) ->where( [ $targetColumn => [ null, 0 ], $mapping[$table]['ns'] => $ns, $mapping[$table]['title'] => $titleString, $dbw->expr( $pageIdColumn, '>=', $lowPageId ), $dbw->expr( $pageIdColumn, '<=', $highPageId ), ] ) ->caller( __METHOD__ )->execute(); $updatedInThisBatch = $dbw->affectedRows(); $updated += $updatedInThisBatch; $this->output( "Updated $updatedInThisBatch rows\n" ); // Sleep between batches for replication to catch up $this->waitForReplication(); $sleep = (int)$this->getOption( 'sleep', 0 ); if ( $sleep > 0 ) { sleep( $sleep ); } } return $updated; } } // @codeCoverageIgnoreStart $maintClass = MigrateLinksTable::class; require_once RUN_MAINTENANCE_IF_MAIN; // @codeCoverageIgnoreEnd