addDescription( 'Find and fix invalid actor IDs.' ); $this->addOption( 'field', 'The name of a database field to process', true, true ); $this->addOption( 'type', 'Which type of invalid actors to find or fix, ' . 'missing or broken (with empty actor_name which can\'t be associated ' . 'with an existing user).', false, true ); $this->addOption( 'skip', 'A comma-separated list of actor IDs to skip.', false, true ); $this->addOption( 'overwrite-with', 'Replace invalid actors with this user. ' . 'Typically, this would be "Unknown user", but it could be any reserved ' . 'system user (per $wgReservedUsernames) or locally registered user. ' . 'If not given, invalid actors will only be listed, not fixed. ' . 'You will be prompted for confirmation before data is written. ', false, true ); $this->setBatchSize( 1000 ); } /** * @return array */ private function getTables() { return [ 'ar_actor' => [ 'archive', 'ar_actor', 'ar_id' ], 'img_actor' => [ 'image', 'img_actor', 'img_name' ], 'oi_actor' => [ 'oldimage', 'oi_actor', 'oi_archive_name' ], // no index on oi_archive_name! 'fa_actor' => [ 'filearchive', 'fa_actor', 'fa_id' ], 'rc_actor' => [ 'recentchanges', 'rc_actor', 'rc_id' ], 'log_actor' => [ 'logging', 'log_actor', 'log_id' ], 'rev_actor' => [ 'revision', 'rev_actor', 'rev_id' ], 'bl_by_actor' => [ 'block', 'bl_by_actor', 'bl_id' ], // no index on bl_by_actor! ]; } /** * @param string $field * @return array|null */ private function getTableInfo( $field ) { $tables = $this->getTables(); return $tables[$field] ?? null; } /** * Returns the actor ID of the user specified with the --overwrite-with option, * or null if --overwrite-with is not set. * * Existing users and reserved system users are supported. * If the user does not have an actor ID yet, one will be assigned. * * @return int|null */ private function getNewActorId() { $name = $this->getOption( 'overwrite-with' ); if ( $name === null ) { return null; } $user = $this->userFactory->newFromName( $name ); if ( !$user ) { $this->fatalError( "Not a valid user name: '$name'" ); } $name = $this->userNameUtils->getCanonical( $name, UserRigorOptions::RIGOR_NONE ); if ( $user->isRegistered() ) { $this->output( "Using existing user: '$user'\n" ); } elseif ( !$this->userNameUtils->isValid( $name ) ) { $this->fatalError( "Not a valid user name: '$name'" ); } elseif ( !$this->userNameUtils->isUsable( $name ) ) { $this->output( "Using system user: '$name'\n" ); } else { $this->fatalError( "Unknown user: '$name'" ); } $dbw = $this->getPrimaryDB(); $actorId = $this->actorNormalization->acquireActorId( $user, $dbw ); if ( !$actorId ) { $this->fatalError( "Failed to acquire an actor ID for user '$user'" ); } $this->output( "Replacement actor ID is $actorId.\n" ); return $actorId; } public function execute() { $services = $this->getServiceContainer(); $this->userFactory = $services->getUserFactory(); $this->userNameUtils = $services->getUserNameUtils(); $this->actorNormalization = $services->getActorNormalization(); $field = $this->getOption( 'field' ); if ( !$this->getTableInfo( $field ) ) { $this->fatalError( "Unknown field: $field.\n" ); } $type = $this->getOption( 'type', 'missing' ); if ( $type !== 'missing' && $type !== 'broken' ) { $this->fatalError( "Unknown type: $type.\n" ); } $skip = $this->parseIntList( $this->getOption( 'skip', '' ) ); $overwrite = $this->getNewActorId(); $bad = $this->findBadActors( $field, $type, $skip ); if ( $bad && $overwrite ) { $this->output( "\n" ); $this->output( "Do you want to OVERWRITE the listed actor IDs?\n" ); $this->output( "Information about the invalid IDs will be lost!\n" ); $this->output( "\n" ); $confirm = self::readconsole( 'Type "yes" to continue: ' ); if ( $confirm === 'yes' ) { $this->overwriteActorIDs( $field, array_keys( $bad ), $overwrite ); } else { $this->fatalError( 'Aborted.' ); } } $this->output( "Done.\n" ); } /** * Find rows that have bad actor IDs. * * @param string $field the database field in which to detect bad actor IDs. * @param string $type type of bad actors, missing or broken. * @param int[] $skip bad actor IDs not to replace. * * @return array a list of row IDs, identifying rows in which the actor ID needs to be replaced. */ private function findBadActors( $field, $type, $skip ) { [ $table, $actorField, $idField ] = $this->getTableInfo( $field ); $this->output( "Finding invalid actor IDs in $table.$actorField...\n" ); $dbr = $this->getServiceContainer()->getDBLoadBalancer()->getConnection( DB_REPLICA, 'vslow' ); /* We are building an SQL query like this one here, performing a left join to detect rows in $table that lack a matching row in the actor table. In this example, $field is 'log_actor', so $table is 'logging', $actorField is 'log_actor', and $idField is 'log_id'. Further, $skip is [ 1, 2, 3, 4 ] and the batch size is 1000. SELECT log_id FROM logging LEFT JOIN actor ON log_actor = actor_id WHERE actor_id IS NULL AND log_actor NOT IN (1, 2, 3, 4) LIMIT 1000; */ $queryBuilder = $dbr->newSelectQueryBuilder() ->select( [ $actorField, $idField ] ) ->from( $table ) ->leftJoin( 'actor', null, [ "$actorField = actor_id" ] ) ->where( $type == 'missing' ? [ 'actor_id' => null ] : [ 'actor_name' => '' ] ) ->limit( $this->getBatchSize() ); if ( $skip ) { $queryBuilder->andWhere( $dbr->expr( $actorField, '!=', $skip ) ); } $res = $queryBuilder->caller( __METHOD__ )->fetchResultSet(); $count = $res->numRows(); $bad = []; if ( $count ) { $this->output( "\t\tID\tACTOR\n" ); } foreach ( $res as $row ) { $id = $row->$idField; $actor = (int)( $row->$actorField ); $bad[$id] = $actor; $this->output( "\t\t$id\t$actor\n" ); } $this->output( "\tFound $count invalid actor IDs.\n" ); if ( $count >= $this->getBatchSize() ) { $this->output( "\tBatch size reached, run again after fixing the current batch.\n" ); } return $bad; } /** * Overwrite the actor ID in a given set of rows. * * @param string $field the database field in which to replace IDs. * @param array $ids The row IDs of the rows in which the actor ID should be replaced * @param int $overwrite The actor ID to write to the rows identified by $ids. * * @return int */ private function overwriteActorIDs( $field, array $ids, int $overwrite ) { [ $table, $actorField, $idField ] = $this->getTableInfo( $field ); $count = count( $ids ); $this->output( "OVERWRITING $count actor IDs in $table.$actorField with $overwrite...\n" ); $dbw = $this->getPrimaryDB(); $dbw->newUpdateQueryBuilder() ->update( $table ) ->set( [ $actorField => $overwrite ] ) ->where( [ $idField => $ids ] ) ->caller( __METHOD__ )->execute(); $count = $dbw->affectedRows(); $this->waitForReplication(); $this->output( "\tUpdated $count rows.\n" ); return $count; } } // @codeCoverageIgnoreStart $maintClass = FindMissingActors::class; require_once RUN_MAINTENANCE_IF_MAIN; // @codeCoverageIgnoreEnd