addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' ); $this->setBatchSize( 200 ); $this->addOption( 'start', 'Name of file to start with', false, true ); $this->addOption( 'end', 'Name of file to end with', false, true ); $this->addOption( 'sleep', 'Time to sleep between each batch (in seconds). Default: 0', false, true ); } public function execute() { $verbose = $this->hasOption( 'verbose' ); $start = $this->getOption( 'start', false ); $sleep = (int)$this->getOption( 'sleep', 0 ); $dbw = $this->getPrimaryDB(); $queryBuilderTemplate = $dbw->newSelectQueryBuilder() ->select( [ 'img_name', 'img_size', 'img_width', 'img_height', 'img_metadata', 'img_bits', 'img_media_type', 'img_major_mime', 'img_minor_mime', 'img_timestamp', 'img_sha1', 'img_actor', 'img_metadata', 'img_description_id', 'img_description_text' => 'comment_img_description.comment_text', 'img_description_data' => 'comment_img_description.comment_data', 'img_description_cid' => 'comment_img_description.comment_id' ] ) ->from( 'image' ) ->join( 'comment', 'comment_img_description', 'comment_img_description.comment_id = img_description_id' ); $totalRowsInserted = 0; $filesHandled = 0; $batchSize = intval( $this->getBatchSize() ); if ( $batchSize <= 0 ) { $this->fatalError( "Batch size is too low...", 12 ); } $end = $this->getOption( 'end', false ); if ( $end !== false ) { $queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) ); } $queryBuilderTemplate ->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC ) ->limit( $batchSize ); $batchCondition = []; // For the WHERE img_name > 'foo' condition that comes after doing a batch if ( $start !== false ) { $batchCondition[] = $dbw->expr( 'img_name', '>=', $start ); } do { $queryBuilder = clone $queryBuilderTemplate; $res = $queryBuilder->andWhere( $batchCondition ) ->caller( __METHOD__ )->fetchResultSet(); if ( $res->numRows() > 0 ) { $row1 = $res->current(); $this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" ); $res->rewind(); } foreach ( $res as $row ) { $rowsInserted = $this->handleFile( $row ); $filesHandled += 1; $totalRowsInserted += $rowsInserted; $this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" ); } if ( $res->numRows() > 0 ) { // @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item $batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ]; } $this->waitForReplication(); if ( $sleep ) { sleep( $sleep ); } } while ( $res->numRows() === $batchSize ); $this->output( "\nFinished migration for $filesHandled files. " . "$totalRowsInserted rows have been inserted into filerevision table.\n" ); } private function handleFile( stdClass $row ): int { $repo = $this->getServiceContainer()->getRepoGroup() ->newCustomLocalRepo(); $dbw = $this->getPrimaryDB(); $rowsInserted = 0; // LocalFile doesn't like it when the row holds img_description_id $imgDescriptionId = $row->img_description_id; unset( $row->img_description_id ); $file = $repo->newFileFromRow( $row ); // Lock everything we can $file->acquireFileLock(); $dbw->startAtomic( __METHOD__ ); $dbw->newSelectQueryBuilder() ->select( '*' ) ->forUpdate() ->from( 'image' ) ->where( [ 'img_name' => $row->img_name ] ) ->caller( __METHOD__ )->fetchRow(); $oldimageRows = $dbw->newSelectQueryBuilder() ->select( '*' ) ->forUpdate() ->from( 'oldimage' ) ->where( [ 'oi_name' => $row->img_name ] ) ->orderBy( 'oi_timestamp', 'ASC' ) ->caller( __METHOD__ )->fetchResultSet(); $dbw->newSelectQueryBuilder() ->select( '*' ) ->forUpdate() ->from( 'file' ) ->where( [ 'file_name' => $row->img_name ] ) ->caller( __METHOD__ )->fetchRow(); // Make sure the row exists in file table $fileId = $file->acquireFileIdFromName(); $fileRevisionRows = $dbw->newSelectQueryBuilder() ->select( '*' ) ->forUpdate() ->from( 'filerevision' ) ->where( [ 'fr_file' => $fileId ] ) ->caller( __METHOD__ )->fetchResultSet(); // Make sure the filerevision rows exist foreach ( $oldimageRows as $oldimageRow ) { $timestamp = $oldimageRow->oi_timestamp; $sha1 = $oldimageRow->oi_sha1; $alreadyDone = false; foreach ( $fileRevisionRows as $fileRevisionRow ) { if ( $timestamp === $fileRevisionRow->fr_timestamp && $sha1 === $fileRevisionRow->fr_sha1 ) { // This assume the combination of oi_timestamp and oi_sha1 // will be always unique which is not the case in production // but also all of them were duplicate old uploads and we are // willing to simply insert one row only. See T67264 $alreadyDone = true; break; } } if ( $alreadyDone ) { continue; } $dbw->newInsertQueryBuilder() ->insertInto( 'filerevision' ) ->row( [ 'fr_file' => $fileId, 'fr_size' => $oldimageRow->oi_size, 'fr_width' => $oldimageRow->oi_width, 'fr_height' => $oldimageRow->oi_height, 'fr_metadata' => $oldimageRow->oi_metadata, 'fr_bits' => $oldimageRow->oi_bits, 'fr_description_id' => $oldimageRow->oi_description_id, 'fr_actor' => $oldimageRow->oi_actor, 'fr_timestamp' => $oldimageRow->oi_timestamp, 'fr_sha1' => $oldimageRow->oi_sha1, 'fr_archive_name' => $oldimageRow->oi_archive_name, 'fr_deleted' => $oldimageRow->oi_deleted, ] ) ->caller( __METHOD__ )->execute(); $rowsInserted += 1; } // Make sure the image row (most current version) is there $timestamp = $row->img_timestamp; $sha1 = $row->img_sha1; $alreadyDone = false; foreach ( $fileRevisionRows as $fileRevisionRow ) { if ( $timestamp === $fileRevisionRow->fr_timestamp && $sha1 === $fileRevisionRow->fr_sha1 ) { $alreadyDone = true; break; } } if ( !$alreadyDone ) { $dbw->newInsertQueryBuilder() ->insertInto( 'filerevision' ) ->row( [ 'fr_file' => $fileId, 'fr_size' => $row->img_size, 'fr_width' => $row->img_width, 'fr_height' => $row->img_height, 'fr_metadata' => $row->img_metadata, 'fr_bits' => $row->img_bits, 'fr_description_id' => $imgDescriptionId, 'fr_actor' => $row->img_actor, 'fr_timestamp' => $row->img_timestamp, 'fr_sha1' => $row->img_sha1, 'fr_archive_name' => '', 'fr_deleted' => 0, ] ) ->caller( __METHOD__ )->execute(); $rowsInserted += 1; } // Make sure file has the latest filerevision $latestFrId = $dbw->newSelectQueryBuilder() ->select( 'fr_id' ) ->from( 'filerevision' ) ->where( [ 'fr_file' => $fileId ] ) ->orderBy( 'fr_timestamp', 'DESC' ) ->fetchField(); $dbw->newUpdateQueryBuilder() ->update( 'file' ) ->set( [ 'file_latest' => $latestFrId ] ) ->where( [ 'file_id' => $fileId ] ) ->caller( __METHOD__ )->execute(); $dbw->endAtomic( __METHOD__ ); $file->releaseFileLock(); return $rowsInserted; } } // @codeCoverageIgnoreStart $maintClass = MigrateFileTables::class; require_once RUN_MAINTENANCE_IF_MAIN; // @codeCoverageIgnoreEnd